[SCSI] qla1280: error recovery rewrite

The driver now waits for the scsi commands associated with a
particular error recovery step to be returned to the mid-layer,
and returns the appropriate SUCCESS or FAILED status.  Removes
unneeded polling of chip for interrupts.

This patch also bumps the driver version number.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
This commit is contained in:
Michael Reed 2009-04-08 14:34:33 -05:00 committed by James Bottomley
parent fd65e5e93c
commit 413e6e18b4
2 changed files with 161 additions and 136 deletions

View file

@ -17,9 +17,12 @@
* General Public License for more details. * General Public License for more details.
* *
******************************************************************************/ ******************************************************************************/
#define QLA1280_VERSION "3.26" #define QLA1280_VERSION "3.27"
/***************************************************************************** /*****************************************************************************
Revision History: Revision History:
Rev 3.27, February 10, 2009, Michael Reed
- General code cleanup.
- Improve error recovery.
Rev 3.26, January 16, 2006 Jes Sorensen Rev 3.26, January 16, 2006 Jes Sorensen
- Ditch all < 2.6 support - Ditch all < 2.6 support
Rev 3.25.1, February 10, 2005 Christoph Hellwig Rev 3.25.1, February 10, 2005 Christoph Hellwig
@ -718,6 +721,8 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
cmd->scsi_done = fn; cmd->scsi_done = fn;
sp->cmd = cmd; sp->cmd = cmd;
sp->flags = 0; sp->flags = 0;
sp->wait = NULL;
CMD_HANDLE(cmd) = (unsigned char *)NULL;
qla1280_print_scsi_cmd(5, cmd); qla1280_print_scsi_cmd(5, cmd);
@ -742,14 +747,6 @@ enum action {
ADAPTER_RESET, ADAPTER_RESET,
}; };
/* timer action for error action processor */
static void qla1280_error_wait_timeout(unsigned long __data)
{
struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data;
struct srb *sp = (struct srb *)CMD_SP(cmd);
complete(sp->wait);
}
static void qla1280_mailbox_timeout(unsigned long __data) static void qla1280_mailbox_timeout(unsigned long __data)
{ {
@ -764,6 +761,65 @@ static void qla1280_mailbox_timeout(unsigned long __data)
complete(ha->mailbox_wait); complete(ha->mailbox_wait);
} }
static int
_qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp,
struct completion *wait)
{
int status = FAILED;
struct scsi_cmnd *cmd = sp->cmd;
spin_unlock_irq(ha->host->host_lock);
wait_for_completion_timeout(wait, 4*HZ);
spin_lock_irq(ha->host->host_lock);
sp->wait = NULL;
if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) {
status = SUCCESS;
(*cmd->scsi_done)(cmd);
}
return status;
}
static int
qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp)
{
DECLARE_COMPLETION_ONSTACK(wait);
sp->wait = &wait;
return _qla1280_wait_for_single_command(ha, sp, &wait);
}
static int
qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target)
{
int cnt;
int status;
struct srb *sp;
struct scsi_cmnd *cmd;
status = SUCCESS;
/*
* Wait for all commands with the designated bus/target
* to be completed by the firmware
*/
for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
sp = ha->outstanding_cmds[cnt];
if (sp) {
cmd = sp->cmd;
if (bus >= 0 && SCSI_BUS_32(cmd) != bus)
continue;
if (target >= 0 && SCSI_TCN_32(cmd) != target)
continue;
status = qla1280_wait_for_single_command(ha, sp);
if (status == FAILED)
break;
}
}
return status;
}
/************************************************************************** /**************************************************************************
* qla1280_error_action * qla1280_error_action
* The function will attempt to perform a specified error action and * The function will attempt to perform a specified error action and
@ -777,11 +833,6 @@ static void qla1280_mailbox_timeout(unsigned long __data)
* Returns: * Returns:
* SUCCESS or FAILED * SUCCESS or FAILED
* *
* Note:
* Resetting the bus always succeeds - is has to, otherwise the
* kernel will panic! Try a surgical technique - sending a BUS
* DEVICE RESET message - on the offending target before pulling
* the SCSI bus reset line.
**************************************************************************/ **************************************************************************/
static int static int
qla1280_error_action(struct scsi_cmnd *cmd, enum action action) qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
@ -789,15 +840,19 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
struct scsi_qla_host *ha; struct scsi_qla_host *ha;
int bus, target, lun; int bus, target, lun;
struct srb *sp; struct srb *sp;
uint16_t data; int i, found;
unsigned char *handle; int result=FAILED;
int result, i; int wait_for_bus=-1;
int wait_for_target = -1;
DECLARE_COMPLETION_ONSTACK(wait); DECLARE_COMPLETION_ONSTACK(wait);
struct timer_list timer;
ENTER("qla1280_error_action"); ENTER("qla1280_error_action");
ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata); ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata);
sp = (struct srb *)CMD_SP(cmd);
bus = SCSI_BUS_32(cmd);
target = SCSI_TCN_32(cmd);
lun = SCSI_LUN_32(cmd);
dprintk(4, "error_action %i, istatus 0x%04x\n", action, dprintk(4, "error_action %i, istatus 0x%04x\n", action,
RD_REG_WORD(&ha->iobase->istatus)); RD_REG_WORD(&ha->iobase->istatus));
@ -811,73 +866,42 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
"Handle=0x%p, action=0x%x\n", "Handle=0x%p, action=0x%x\n",
ha->host_no, cmd, CMD_HANDLE(cmd), action); ha->host_no, cmd, CMD_HANDLE(cmd), action);
sp = (struct srb *)CMD_SP(cmd);
handle = CMD_HANDLE(cmd);
/* Check for pending interrupts. */
data = qla1280_debounce_register(&ha->iobase->istatus);
/* /*
* The io_request_lock is held when the reset handler is called, hence * Check to see if we have the command in the outstanding_cmds[]
* the interrupt handler cannot be running in parallel as it also * array. If not then it must have completed before this error
* grabs the lock. /Jes * action was initiated. If the error_action isn't ABORT_COMMAND
* then the driver must proceed with the requested action.
*/ */
if (data & RISC_INT) found = -1;
qla1280_isr(ha, &ha->done_q);
/*
* Determine the suggested action that the mid-level driver wants
* us to perform.
*/
if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) {
if(action == ABORT_COMMAND) {
/* we never got this command */
printk(KERN_INFO "qla1280: Aborting a NULL handle\n");
return SUCCESS; /* no action - we don't have command */
}
} else {
sp->wait = &wait;
}
bus = SCSI_BUS_32(cmd);
target = SCSI_TCN_32(cmd);
lun = SCSI_LUN_32(cmd);
/* Overloading result. Here it means the success or fail of the
* *issue* of the action. When we return from the routine, it must
* mean the actual success or fail of the action */
result = FAILED;
switch (action) {
case ABORT_COMMAND:
if ((sp->flags & SRB_ABORT_PENDING)) {
printk(KERN_WARNING
"scsi(): Command has a pending abort "
"message - ABORT_PENDING.\n");
/* This should technically be impossible since we
* now wait for abort completion */
break;
}
for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) { for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
if (sp == ha->outstanding_cmds[i]) { if (sp == ha->outstanding_cmds[i]) {
dprintk(1, "qla1280: RISC aborting command\n"); found = i;
if (qla1280_abort_command(ha, sp, i) == 0) sp->wait = &wait; /* we'll wait for it to complete */
break;
}
}
if (found < 0) { /* driver doesn't have command */
result = SUCCESS; result = SUCCESS;
else { if (qla1280_verbose) {
printk(KERN_INFO
"scsi(%ld:%d:%d:%d): specified command has "
"already completed.\n", ha->host_no, bus,
target, lun);
}
}
switch (action) {
case ABORT_COMMAND:
dprintk(1, "qla1280: RISC aborting command\n");
/* /*
* Since we don't know what might * The abort might fail due to race when the host_lock
* have happend to the command, it * is released to issue the abort. As such, we
* is unsafe to remove it from the * don't bother to check the return status.
* device's queue at this point.
* Wait and let the escalation
* process take care of it.
*/ */
printk(KERN_WARNING if (found >= 0)
"scsi(%li:%i:%i:%i): Unable" qla1280_abort_command(ha, sp, found);
" to abort command!\n",
ha->host_no, bus, target, lun);
}
}
}
break; break;
case DEVICE_RESET: case DEVICE_RESET:
@ -885,16 +909,21 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
printk(KERN_INFO printk(KERN_INFO
"scsi(%ld:%d:%d:%d): Queueing device reset " "scsi(%ld:%d:%d:%d): Queueing device reset "
"command.\n", ha->host_no, bus, target, lun); "command.\n", ha->host_no, bus, target, lun);
if (qla1280_device_reset(ha, bus, target) == 0) if (qla1280_device_reset(ha, bus, target) == 0) {
result = SUCCESS; /* issued device reset, set wait conditions */
wait_for_bus = bus;
wait_for_target = target;
}
break; break;
case BUS_RESET: case BUS_RESET:
if (qla1280_verbose) if (qla1280_verbose)
printk(KERN_INFO "qla1280(%ld:%d): Issued bus " printk(KERN_INFO "qla1280(%ld:%d): Issued bus "
"reset.\n", ha->host_no, bus); "reset.\n", ha->host_no, bus);
if (qla1280_bus_reset(ha, bus) == 0) if (qla1280_bus_reset(ha, bus) == 0) {
result = SUCCESS; /* issued bus reset, set wait conditions */
wait_for_bus = bus;
}
break; break;
case ADAPTER_RESET: case ADAPTER_RESET:
@ -907,55 +936,48 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
"continue automatically\n", ha->host_no); "continue automatically\n", ha->host_no);
} }
ha->flags.reset_active = 1; ha->flags.reset_active = 1;
/*
* We restarted all of the commands automatically, so the if (qla1280_abort_isp(ha) != 0) { /* it's dead */
* mid-level code can expect completions momentitarily. result = FAILED;
*/ }
if (qla1280_abort_isp(ha) == 0)
result = SUCCESS;
ha->flags.reset_active = 0; ha->flags.reset_active = 0;
} }
if (!list_empty(&ha->done_q))
qla1280_done(ha);
/* If we didn't manage to issue the action, or we have no
* command to wait for, exit here */
if (result == FAILED || handle == NULL ||
handle == (unsigned char *)INVALID_HANDLE) {
/* /*
* Clear completion queue to avoid qla1280_done() trying * At this point, the host_lock has been released and retaken
* to complete the command at a later stage after we * by the issuance of the mailbox command.
* have exited the current context * Wait for the command passed in by the mid-layer if it
* was found by the driver. It might have been returned
* between eh recovery steps, hence the check of the "found"
* variable.
*/ */
sp->wait = NULL;
goto leave; if (found >= 0)
result = _qla1280_wait_for_single_command(ha, sp, &wait);
if (action == ABORT_COMMAND && result != SUCCESS) {
printk(KERN_WARNING
"scsi(%li:%i:%i:%i): "
"Unable to abort command!\n",
ha->host_no, bus, target, lun);
} }
/* set up a timer just in case we're really jammed */ /*
init_timer(&timer); * If the command passed in by the mid-layer has been
timer.expires = jiffies + 4*HZ; * returned by the board, then wait for any additional
timer.data = (unsigned long)cmd; * commands which are supposed to complete based upon
timer.function = qla1280_error_wait_timeout; * the error action.
add_timer(&timer); *
* All commands are unconditionally returned during a
/* wait for the action to complete (or the timer to expire) */ * call to qla1280_abort_isp(), ADAPTER_RESET. No need
spin_unlock_irq(ha->host->host_lock); * to wait for them.
wait_for_completion(&wait); */
del_timer_sync(&timer); if (result == SUCCESS && wait_for_bus >= 0) {
spin_lock_irq(ha->host->host_lock); result = qla1280_wait_for_pending_commands(ha,
sp->wait = NULL; wait_for_bus, wait_for_target);
/* the only action we might get a fail for is abort */
if (action == ABORT_COMMAND) {
if(sp->flags & SRB_ABORTED)
result = SUCCESS;
else
result = FAILED;
} }
leave:
dprintk(1, "RESET returning %d\n", result); dprintk(1, "RESET returning %d\n", result);
LEAVE("qla1280_error_action"); LEAVE("qla1280_error_action");
@ -1258,6 +1280,7 @@ qla1280_done(struct scsi_qla_host *ha)
switch ((CMD_RESULT(cmd) >> 16)) { switch ((CMD_RESULT(cmd) >> 16)) {
case DID_RESET: case DID_RESET:
/* Issue marker command. */ /* Issue marker command. */
if (!ha->flags.abort_isp_active)
qla1280_marker(ha, bus, target, 0, MK_SYNC_ID); qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
break; break;
case DID_ABORT: case DID_ABORT:
@ -1272,12 +1295,11 @@ qla1280_done(struct scsi_qla_host *ha)
scsi_dma_unmap(cmd); scsi_dma_unmap(cmd);
/* Call the mid-level driver interrupt handler */ /* Call the mid-level driver interrupt handler */
CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE;
ha->actthreads--; ha->actthreads--;
if (sp->wait == NULL)
(*(cmd)->scsi_done)(cmd); (*(cmd)->scsi_done)(cmd);
else
if(sp->wait != NULL)
complete(sp->wait); complete(sp->wait);
} }
LEAVE("qla1280_done"); LEAVE("qla1280_done");
@ -3415,6 +3437,7 @@ qla1280_isr(struct scsi_qla_host *ha, struct list_head *done_q)
/* Save ISP completion status */ /* Save ISP completion status */
CMD_RESULT(sp->cmd) = 0; CMD_RESULT(sp->cmd) = 0;
CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
/* Place block on done queue */ /* Place block on done queue */
list_add_tail(&sp->list, done_q); list_add_tail(&sp->list, done_q);
@ -3681,6 +3704,8 @@ qla1280_status_entry(struct scsi_qla_host *ha, struct response *pkt,
} }
} }
CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
/* Place command on done queue. */ /* Place command on done queue. */
list_add_tail(&sp->list, done_q); list_add_tail(&sp->list, done_q);
out: out:
@ -3736,6 +3761,8 @@ qla1280_error_entry(struct scsi_qla_host *ha, struct response *pkt,
CMD_RESULT(sp->cmd) = DID_ERROR << 16; CMD_RESULT(sp->cmd) = DID_ERROR << 16;
} }
CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
/* Place command on done queue. */ /* Place command on done queue. */
list_add_tail(&sp->list, done_q); list_add_tail(&sp->list, done_q);
} }
@ -3786,19 +3813,16 @@ qla1280_abort_isp(struct scsi_qla_host *ha)
struct scsi_cmnd *cmd; struct scsi_cmnd *cmd;
sp = ha->outstanding_cmds[cnt]; sp = ha->outstanding_cmds[cnt];
if (sp) { if (sp) {
cmd = sp->cmd; cmd = sp->cmd;
CMD_RESULT(cmd) = DID_RESET << 16; CMD_RESULT(cmd) = DID_RESET << 16;
CMD_HANDLE(cmd) = COMPLETED_HANDLE;
sp->cmd = NULL;
ha->outstanding_cmds[cnt] = NULL; ha->outstanding_cmds[cnt] = NULL;
list_add_tail(&sp->list, &ha->done_q);
(*cmd->scsi_done)(cmd);
sp->flags = 0;
} }
} }
qla1280_done(ha);
status = qla1280_load_firmware(ha); status = qla1280_load_firmware(ha);
if (status) if (status)
goto out; goto out;

View file

@ -88,7 +88,8 @@
/* Maximum outstanding commands in ISP queues */ /* Maximum outstanding commands in ISP queues */
#define MAX_OUTSTANDING_COMMANDS 512 #define MAX_OUTSTANDING_COMMANDS 512
#define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS + 2) #define COMPLETED_HANDLE ((unsigned char *) \
(MAX_OUTSTANDING_COMMANDS + 2))
/* ISP request and response entry counts (37-65535) */ /* ISP request and response entry counts (37-65535) */
#define REQUEST_ENTRY_CNT 255 /* Number of request entries. */ #define REQUEST_ENTRY_CNT 255 /* Number of request entries. */