[SCSI] mpt2sas: Support for stopping driver when Firmware encounters

Added command line option and shost sysfs attribute called
mpt2sas_fwfault_debug. When enduser writes a "1" to this parameter, this
will enable support in the driver for debugging firmware timeout related
issues.  This handling was added in three areas (a) scsi error handling
callback called task_abort, (b) IOCTL interface, and (c) other timeouts that
result in diag resets, such as manufacturing config pages.  When this
support is enabled, the driver will provide dump_stack to console, halt
controller firmware, and panic driver. The end user probably would want to
setup serial console redirection so the dump stack can be seen.

Here are the three methods for enable this support:

(a) # insmod mpt2sas.ko mpt2sas_fwfault_debug=1
(b) # echo 1 > /sys/module/mpt2sas/parameters/mpt2sas_fwfault_debug
(c) # echo 1 > /sys/class/scsi_host/host#/fwfault_debug  (where # is
the host number)

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: Eric Moore <Eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
This commit is contained in:
Kashyap, Desai 2009-09-23 17:26:58 +05:30 committed by James Bottomley
parent 9fec5f9fc2
commit fa7f316735
4 changed files with 119 additions and 16 deletions

View file

@ -77,6 +77,32 @@ static int msix_disable = -1;
module_param(msix_disable, int, 0);
MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");
int mpt2sas_fwfault_debug;
MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault "
"and halt firmware - (default=0)");
/**
* _scsih_set_fwfault_debug - global setting of ioc->fwfault_debug.
*
*/
static int
_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT2SAS_ADAPTER *ioc;
if (ret)
return ret;
printk(KERN_INFO "setting logging_level(0x%08x)\n",
mpt2sas_fwfault_debug);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
ioc->fwfault_debug = mpt2sas_fwfault_debug;
return 0;
}
module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug,
param_get_int, &mpt2sas_fwfault_debug, 0644);
/**
* _base_fault_reset_work - workq handling ioc fault conditions
* @work: input argument, used to derive ioc
@ -177,6 +203,51 @@ mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc)
}
}
/**
* mpt2sas_base_fault_info - verbose translation of firmware FAULT code
* @ioc: per adapter object
* @fault_code: fault code
*
* Return nothing.
*/
void
mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
{
printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
ioc->name, fault_code);
}
/**
* mpt2sas_halt_firmware - halt's mpt controller firmware
* @ioc: per adapter object
*
* For debugging timeout related issues. Writing 0xCOFFEE00
* to the doorbell register will halt controller firmware. With
* the purpose to stop both driver and firmware, the enduser can
* obtain a ring buffer from controller UART.
*/
void
mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc)
{
u32 doorbell;
if (!ioc->fwfault_debug)
return;
dump_stack();
doorbell = readl(&ioc->chip->Doorbell);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT)
mpt2sas_base_fault_info(ioc , doorbell);
else {
writel(0xC0FFEE00, &ioc->chip->Doorbell);
printk(MPT2SAS_ERR_FMT "Firmware is halted due to command "
"timeout\n", ioc->name);
}
panic("panic in %s\n", __func__);
}
#ifdef CONFIG_SCSI_MPT2SAS_LOGGING
/**
* _base_sas_ioc_info - verbose translation of the ioc status
@ -525,20 +596,6 @@ _base_sas_log_info(struct MPT2SAS_ADAPTER *ioc , u32 log_info)
sas_loginfo.dw.subcode);
}
/**
* mpt2sas_base_fault_info - verbose translation of firmware FAULT code
* @ioc: pointer to scsi command object
* @fault_code: fault code
*
* Return nothing.
*/
void
mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
{
printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
ioc->name, fault_code);
}
/**
* _base_display_reply_info -
* @ioc: pointer to scsi command object
@ -3684,6 +3741,9 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag,
dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: enter\n", ioc->name,
__func__));
if (mpt2sas_fwfault_debug)
mpt2sas_halt_firmware(ioc);
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
if (ioc->shost_recovery) {
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);

View file

@ -466,6 +466,7 @@ typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
* @chip_phys: physical addrss prior to mapping
* @pio_chip: I/O mapped register space
* @logging_level: see mpt2sas_debug.h
* @fwfault_debug: debuging FW timeouts
* @ir_firmware: IR firmware present
* @bars: bitmask of BAR's that must be configured
* @mask_interrupts: ignore interrupt
@ -587,6 +588,7 @@ struct MPT2SAS_ADAPTER {
unsigned long chip_phys;
unsigned long pio_chip;
int logging_level;
int fwfault_debug;
u8 ir_firmware;
int bars;
u8 mask_interrupts;
@ -803,6 +805,8 @@ int mpt2sas_base_scsi_enclosure_processor(struct MPT2SAS_ADAPTER *ioc,
Mpi2SepReply_t *mpi_reply, Mpi2SepRequest_t *mpi_request);
void mpt2sas_base_validate_event_type(struct MPT2SAS_ADAPTER *ioc, u32 *event_type);
void mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc);
/* scsih shared API */
u8 mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
u32 reply);

View file

@ -896,6 +896,7 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
printk(MPT2SAS_INFO_FMT "issue target reset: handle "
"= (0x%04x)\n", ioc->name,
mpi_request->FunctionDependent1);
mpt2sas_halt_firmware(ioc);
mutex_lock(&ioc->tm_cmds.mutex);
mpt2sas_scsih_issue_tm(ioc,
mpi_request->FunctionDependent1, 0,
@ -2474,6 +2475,43 @@ _ctl_logging_level_store(struct device *cdev, struct device_attribute *attr,
static DEVICE_ATTR(logging_level, S_IRUGO | S_IWUSR,
_ctl_logging_level_show, _ctl_logging_level_store);
/* device attributes */
/*
* _ctl_fwfault_debug_show - show/store fwfault_debug
* @cdev - pointer to embedded class device
* @buf - the buffer returned
*
* mpt2sas_fwfault_debug is command line option
* A sysfs 'read/write' shost attribute.
*/
static ssize_t
_ctl_fwfault_debug_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
return snprintf(buf, PAGE_SIZE, "%d\n", ioc->fwfault_debug);
}
static ssize_t
_ctl_fwfault_debug_store(struct device *cdev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
int val = 0;
if (sscanf(buf, "%d", &val) != 1)
return -EINVAL;
ioc->fwfault_debug = val;
printk(MPT2SAS_INFO_FMT "fwfault_debug=%d\n", ioc->name,
ioc->fwfault_debug);
return strlen(buf);
}
static DEVICE_ATTR(fwfault_debug, S_IRUGO | S_IWUSR,
_ctl_fwfault_debug_show, _ctl_fwfault_debug_store);
struct device_attribute *mpt2sas_host_attrs[] = {
&dev_attr_version_fw,
&dev_attr_version_bios,
@ -2487,13 +2525,12 @@ struct device_attribute *mpt2sas_host_attrs[] = {
&dev_attr_io_delay,
&dev_attr_device_delay,
&dev_attr_logging_level,
&dev_attr_fwfault_debug,
&dev_attr_fw_queue_depth,
&dev_attr_host_sas_address,
NULL,
};
/* device attributes */
/**
* _ctl_device_sas_address_show - sas address
* @cdev - pointer to embedded class device

View file

@ -1929,6 +1929,8 @@ _scsih_abort(struct scsi_cmnd *scmd)
goto out;
}
mpt2sas_halt_firmware(ioc);
mutex_lock(&ioc->tm_cmds.mutex);
handle = sas_device_priv_data->sas_target->handle;
mpt2sas_scsih_issue_tm(ioc, handle, sas_device_priv_data->lun,