From 413e6e18b483de272bdafa56e5c086c75f11d681 Mon Sep 17 00:00:00 2001 From: Michael Reed Date: Wed, 8 Apr 2009 14:34:33 -0500 Subject: [PATCH] [SCSI] qla1280: error recovery rewrite The driver now waits for the scsi commands associated with a particular error recovery step to be returned to the mid-layer, and returns the appropriate SUCCESS or FAILED status. Removes unneeded polling of chip for interrupts. This patch also bumps the driver version number. Signed-off-by: Michael Reed Signed-off-by: James Bottomley --- drivers/scsi/qla1280.c | 294 ++++++++++++++++++++++------------------- drivers/scsi/qla1280.h | 3 +- 2 files changed, 161 insertions(+), 136 deletions(-) diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 0cbad4982db..8371d917a9a 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -17,9 +17,12 @@ * General Public License for more details. * ******************************************************************************/ -#define QLA1280_VERSION "3.26" +#define QLA1280_VERSION "3.27" /***************************************************************************** Revision History: + Rev 3.27, February 10, 2009, Michael Reed + - General code cleanup. + - Improve error recovery. Rev 3.26, January 16, 2006 Jes Sorensen - Ditch all < 2.6 support Rev 3.25.1, February 10, 2005 Christoph Hellwig @@ -718,6 +721,8 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *)) cmd->scsi_done = fn; sp->cmd = cmd; sp->flags = 0; + sp->wait = NULL; + CMD_HANDLE(cmd) = (unsigned char *)NULL; qla1280_print_scsi_cmd(5, cmd); @@ -742,14 +747,6 @@ enum action { ADAPTER_RESET, }; -/* timer action for error action processor */ -static void qla1280_error_wait_timeout(unsigned long __data) -{ - struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data; - struct srb *sp = (struct srb *)CMD_SP(cmd); - - complete(sp->wait); -} static void qla1280_mailbox_timeout(unsigned long __data) { @@ -764,6 +761,65 @@ static void qla1280_mailbox_timeout(unsigned long __data) complete(ha->mailbox_wait); } +static int +_qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp, + struct completion *wait) +{ + int status = FAILED; + struct scsi_cmnd *cmd = sp->cmd; + + spin_unlock_irq(ha->host->host_lock); + wait_for_completion_timeout(wait, 4*HZ); + spin_lock_irq(ha->host->host_lock); + sp->wait = NULL; + if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) { + status = SUCCESS; + (*cmd->scsi_done)(cmd); + } + return status; +} + +static int +qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp) +{ + DECLARE_COMPLETION_ONSTACK(wait); + + sp->wait = &wait; + return _qla1280_wait_for_single_command(ha, sp, &wait); +} + +static int +qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target) +{ + int cnt; + int status; + struct srb *sp; + struct scsi_cmnd *cmd; + + status = SUCCESS; + + /* + * Wait for all commands with the designated bus/target + * to be completed by the firmware + */ + for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + sp = ha->outstanding_cmds[cnt]; + if (sp) { + cmd = sp->cmd; + + if (bus >= 0 && SCSI_BUS_32(cmd) != bus) + continue; + if (target >= 0 && SCSI_TCN_32(cmd) != target) + continue; + + status = qla1280_wait_for_single_command(ha, sp); + if (status == FAILED) + break; + } + } + return status; +} + /************************************************************************** * qla1280_error_action * The function will attempt to perform a specified error action and @@ -777,11 +833,6 @@ static void qla1280_mailbox_timeout(unsigned long __data) * Returns: * SUCCESS or FAILED * - * Note: - * Resetting the bus always succeeds - is has to, otherwise the - * kernel will panic! Try a surgical technique - sending a BUS - * DEVICE RESET message - on the offending target before pulling - * the SCSI bus reset line. **************************************************************************/ static int qla1280_error_action(struct scsi_cmnd *cmd, enum action action) @@ -789,15 +840,19 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action) struct scsi_qla_host *ha; int bus, target, lun; struct srb *sp; - uint16_t data; - unsigned char *handle; - int result, i; + int i, found; + int result=FAILED; + int wait_for_bus=-1; + int wait_for_target = -1; DECLARE_COMPLETION_ONSTACK(wait); - struct timer_list timer; ENTER("qla1280_error_action"); ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata); + sp = (struct srb *)CMD_SP(cmd); + bus = SCSI_BUS_32(cmd); + target = SCSI_TCN_32(cmd); + lun = SCSI_LUN_32(cmd); dprintk(4, "error_action %i, istatus 0x%04x\n", action, RD_REG_WORD(&ha->iobase->istatus)); @@ -811,73 +866,42 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action) "Handle=0x%p, action=0x%x\n", ha->host_no, cmd, CMD_HANDLE(cmd), action); - sp = (struct srb *)CMD_SP(cmd); - handle = CMD_HANDLE(cmd); - - /* Check for pending interrupts. */ - data = qla1280_debounce_register(&ha->iobase->istatus); /* - * The io_request_lock is held when the reset handler is called, hence - * the interrupt handler cannot be running in parallel as it also - * grabs the lock. /Jes + * Check to see if we have the command in the outstanding_cmds[] + * array. If not then it must have completed before this error + * action was initiated. If the error_action isn't ABORT_COMMAND + * then the driver must proceed with the requested action. */ - if (data & RISC_INT) - qla1280_isr(ha, &ha->done_q); - - /* - * Determine the suggested action that the mid-level driver wants - * us to perform. - */ - if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) { - if(action == ABORT_COMMAND) { - /* we never got this command */ - printk(KERN_INFO "qla1280: Aborting a NULL handle\n"); - return SUCCESS; /* no action - we don't have command */ - } - } else { - sp->wait = &wait; - } - - bus = SCSI_BUS_32(cmd); - target = SCSI_TCN_32(cmd); - lun = SCSI_LUN_32(cmd); - - /* Overloading result. Here it means the success or fail of the - * *issue* of the action. When we return from the routine, it must - * mean the actual success or fail of the action */ - result = FAILED; - switch (action) { - case ABORT_COMMAND: - if ((sp->flags & SRB_ABORT_PENDING)) { - printk(KERN_WARNING - "scsi(): Command has a pending abort " - "message - ABORT_PENDING.\n"); - /* This should technically be impossible since we - * now wait for abort completion */ + found = -1; + for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) { + if (sp == ha->outstanding_cmds[i]) { + found = i; + sp->wait = &wait; /* we'll wait for it to complete */ break; } + } - for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) { - if (sp == ha->outstanding_cmds[i]) { - dprintk(1, "qla1280: RISC aborting command\n"); - if (qla1280_abort_command(ha, sp, i) == 0) - result = SUCCESS; - else { - /* - * Since we don't know what might - * have happend to the command, it - * is unsafe to remove it from the - * device's queue at this point. - * Wait and let the escalation - * process take care of it. - */ - printk(KERN_WARNING - "scsi(%li:%i:%i:%i): Unable" - " to abort command!\n", - ha->host_no, bus, target, lun); - } - } + if (found < 0) { /* driver doesn't have command */ + result = SUCCESS; + if (qla1280_verbose) { + printk(KERN_INFO + "scsi(%ld:%d:%d:%d): specified command has " + "already completed.\n", ha->host_no, bus, + target, lun); } + } + + switch (action) { + + case ABORT_COMMAND: + dprintk(1, "qla1280: RISC aborting command\n"); + /* + * The abort might fail due to race when the host_lock + * is released to issue the abort. As such, we + * don't bother to check the return status. + */ + if (found >= 0) + qla1280_abort_command(ha, sp, found); break; case DEVICE_RESET: @@ -885,16 +909,21 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action) printk(KERN_INFO "scsi(%ld:%d:%d:%d): Queueing device reset " "command.\n", ha->host_no, bus, target, lun); - if (qla1280_device_reset(ha, bus, target) == 0) - result = SUCCESS; + if (qla1280_device_reset(ha, bus, target) == 0) { + /* issued device reset, set wait conditions */ + wait_for_bus = bus; + wait_for_target = target; + } break; case BUS_RESET: if (qla1280_verbose) printk(KERN_INFO "qla1280(%ld:%d): Issued bus " "reset.\n", ha->host_no, bus); - if (qla1280_bus_reset(ha, bus) == 0) - result = SUCCESS; + if (qla1280_bus_reset(ha, bus) == 0) { + /* issued bus reset, set wait conditions */ + wait_for_bus = bus; + } break; case ADAPTER_RESET: @@ -907,55 +936,48 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action) "continue automatically\n", ha->host_no); } ha->flags.reset_active = 1; - /* - * We restarted all of the commands automatically, so the - * mid-level code can expect completions momentitarily. - */ - if (qla1280_abort_isp(ha) == 0) - result = SUCCESS; + + if (qla1280_abort_isp(ha) != 0) { /* it's dead */ + result = FAILED; + } ha->flags.reset_active = 0; } - if (!list_empty(&ha->done_q)) - qla1280_done(ha); + /* + * At this point, the host_lock has been released and retaken + * by the issuance of the mailbox command. + * Wait for the command passed in by the mid-layer if it + * was found by the driver. It might have been returned + * between eh recovery steps, hence the check of the "found" + * variable. + */ - /* If we didn't manage to issue the action, or we have no - * command to wait for, exit here */ - if (result == FAILED || handle == NULL || - handle == (unsigned char *)INVALID_HANDLE) { - /* - * Clear completion queue to avoid qla1280_done() trying - * to complete the command at a later stage after we - * have exited the current context - */ - sp->wait = NULL; - goto leave; + if (found >= 0) + result = _qla1280_wait_for_single_command(ha, sp, &wait); + + if (action == ABORT_COMMAND && result != SUCCESS) { + printk(KERN_WARNING + "scsi(%li:%i:%i:%i): " + "Unable to abort command!\n", + ha->host_no, bus, target, lun); } - /* set up a timer just in case we're really jammed */ - init_timer(&timer); - timer.expires = jiffies + 4*HZ; - timer.data = (unsigned long)cmd; - timer.function = qla1280_error_wait_timeout; - add_timer(&timer); - - /* wait for the action to complete (or the timer to expire) */ - spin_unlock_irq(ha->host->host_lock); - wait_for_completion(&wait); - del_timer_sync(&timer); - spin_lock_irq(ha->host->host_lock); - sp->wait = NULL; - - /* the only action we might get a fail for is abort */ - if (action == ABORT_COMMAND) { - if(sp->flags & SRB_ABORTED) - result = SUCCESS; - else - result = FAILED; + /* + * If the command passed in by the mid-layer has been + * returned by the board, then wait for any additional + * commands which are supposed to complete based upon + * the error action. + * + * All commands are unconditionally returned during a + * call to qla1280_abort_isp(), ADAPTER_RESET. No need + * to wait for them. + */ + if (result == SUCCESS && wait_for_bus >= 0) { + result = qla1280_wait_for_pending_commands(ha, + wait_for_bus, wait_for_target); } - leave: dprintk(1, "RESET returning %d\n", result); LEAVE("qla1280_error_action"); @@ -1258,7 +1280,8 @@ qla1280_done(struct scsi_qla_host *ha) switch ((CMD_RESULT(cmd) >> 16)) { case DID_RESET: /* Issue marker command. */ - qla1280_marker(ha, bus, target, 0, MK_SYNC_ID); + if (!ha->flags.abort_isp_active) + qla1280_marker(ha, bus, target, 0, MK_SYNC_ID); break; case DID_ABORT: sp->flags &= ~SRB_ABORT_PENDING; @@ -1272,12 +1295,11 @@ qla1280_done(struct scsi_qla_host *ha) scsi_dma_unmap(cmd); /* Call the mid-level driver interrupt handler */ - CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE; ha->actthreads--; - (*(cmd)->scsi_done)(cmd); - - if(sp->wait != NULL) + if (sp->wait == NULL) + (*(cmd)->scsi_done)(cmd); + else complete(sp->wait); } LEAVE("qla1280_done"); @@ -3415,6 +3437,7 @@ qla1280_isr(struct scsi_qla_host *ha, struct list_head *done_q) /* Save ISP completion status */ CMD_RESULT(sp->cmd) = 0; + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; /* Place block on done queue */ list_add_tail(&sp->list, done_q); @@ -3681,6 +3704,8 @@ qla1280_status_entry(struct scsi_qla_host *ha, struct response *pkt, } } + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; + /* Place command on done queue. */ list_add_tail(&sp->list, done_q); out: @@ -3736,6 +3761,8 @@ qla1280_error_entry(struct scsi_qla_host *ha, struct response *pkt, CMD_RESULT(sp->cmd) = DID_ERROR << 16; } + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; + /* Place command on done queue. */ list_add_tail(&sp->list, done_q); } @@ -3786,19 +3813,16 @@ qla1280_abort_isp(struct scsi_qla_host *ha) struct scsi_cmnd *cmd; sp = ha->outstanding_cmds[cnt]; if (sp) { - cmd = sp->cmd; CMD_RESULT(cmd) = DID_RESET << 16; - - sp->cmd = NULL; + CMD_HANDLE(cmd) = COMPLETED_HANDLE; ha->outstanding_cmds[cnt] = NULL; - - (*cmd->scsi_done)(cmd); - - sp->flags = 0; + list_add_tail(&sp->list, &ha->done_q); } } + qla1280_done(ha); + status = qla1280_load_firmware(ha); if (status) goto out; diff --git a/drivers/scsi/qla1280.h b/drivers/scsi/qla1280.h index d7c44b8d2b4..834884b9eed 100644 --- a/drivers/scsi/qla1280.h +++ b/drivers/scsi/qla1280.h @@ -88,7 +88,8 @@ /* Maximum outstanding commands in ISP queues */ #define MAX_OUTSTANDING_COMMANDS 512 -#define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS + 2) +#define COMPLETED_HANDLE ((unsigned char *) \ + (MAX_OUTSTANDING_COMMANDS + 2)) /* ISP request and response entry counts (37-65535) */ #define REQUEST_ENTRY_CNT 255 /* Number of request entries. */