From: Mike Anderson <andmike@us.ibm.com>
To: linux-scsi@vger.kernel.org
Subject: Re: [PATCH / RFC] scsi_error handler update. (3/4)
Date: Tue, 11 Feb 2003 00:17:45 -0800 [thread overview]
Message-ID: <20030211081744.GC1368@beaverton.ibm.com> (raw)
In-Reply-To: <20030211081536.GB1368@beaverton.ibm.com>
This patch series is against scsi-misc-2.5.
02_serror-hndlr-1.diff:
- Change to using eh_cmd_list.
- Change scsi_unjam_host to get sense, abort cmds, ready
devices, and disposition cmds for retry or finish.
- Moved retries outside of eh.
-andmike
--
Michael Anderson
andmike@us.ibm.com
scsi_error.c | 477 +++++++++++++++++++++++++++++------------------------------
1 files changed, 241 insertions(+), 236 deletions(-)
------
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi_error.c Mon Feb 10 22:25:47 2003
@@ -211,36 +211,36 @@
* @sc_list: List for failed cmds.
* @shost: scsi host being recovered.
**/
-static void scsi_eh_prt_fail_stats(Scsi_Cmnd *sc_list, struct Scsi_Host *shost)
+static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost)
{
- Scsi_Cmnd *scmd;
- Scsi_Device *sdev;
+ struct scsi_cmnd *scmd;
+ struct scsi_device *sdev;
int total_failures = 0;
int cmd_failed = 0;
- int cmd_timed_out = 0;
+ int cmd_cancel = 0;
int devices_failed = 0;
list_for_each_entry(sdev, &shost->my_devices, siblings) {
- for (scmd = sc_list; scmd; scmd = scmd->bh_next) {
+ list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
if (scmd->device == sdev) {
++total_failures;
if (scsi_eh_eflags_chk(scmd,
- SCSI_EH_CMD_TIMEOUT))
- ++cmd_timed_out;
- else
+ SCSI_EH_CANCEL_CMD))
+ ++cmd_cancel;
+ else
++cmd_failed;
}
}
- if (cmd_timed_out || cmd_failed) {
+ if (cmd_cancel || cmd_failed) {
SCSI_LOG_ERROR_RECOVERY(3,
printk("%s: %d:%d:%d:%d cmds failed: %d,"
- " timedout: %d\n",
+ " cancel: %d\n",
__FUNCTION__, shost->host_no,
sdev->channel, sdev->id, sdev->lun,
- cmd_failed, cmd_timed_out));
- cmd_timed_out = 0;
+ cmd_failed, cmd_cancel));
+ cmd_cancel = 0;
cmd_failed = 0;
++devices_failed;
}
@@ -253,68 +253,6 @@
#endif
/**
- * scsi_eh_get_failed - Gather failed cmds.
- * @sc_list: A pointer to a list for failed cmds.
- * @shost: Scsi host being recovered.
- *
- * XXX Add opaque interator for device / shost. Investigate direct
- * addition to per eh list on error allowing skipping of this step.
- **/
-static void scsi_eh_get_failed(Scsi_Cmnd **sc_list, struct Scsi_Host *shost)
-{
- int found;
- Scsi_Device *sdev;
- Scsi_Cmnd *scmd;
-
- found = 0;
- list_for_each_entry(sdev, &shost->my_devices, siblings) {
- unsigned long flags;
-
- spin_lock_irqsave(&sdev->list_lock, flags);
- list_for_each_entry(scmd, &sdev->cmd_list, list) {
- if (scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
- scmd->bh_next = *sc_list;
- *sc_list = scmd;
- found++;
- } else {
- /*
- * FIXME Verify how this can happen and if
- * this is still needed??
- */
- if (scmd->state != SCSI_STATE_INITIALIZING
- && scmd->state != SCSI_STATE_UNUSED) {
- /*
- * Rats. Something is still floating
- * around out there This could be the
- * result of the fact that the upper level
- * drivers are still frobbing commands
- * that might have succeeded. There are
- * two outcomes. One is that the command
- * block will eventually be freed, and the
- * other one is that the command will be
- * queued and will be finished along the
- * way.
- */
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr"
- " prematurely woken"
- " cmds still active"
- " (%p %x %d)\n",
- scmd, scmd->state,
- scmd->device->id));
- }
- }
- }
- spin_unlock_irqrestore(&sdev->list_lock, flags);
- }
-
- SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost));
-
- if (shost->host_failed != found)
- printk(KERN_ERR "%s: host_failed: %d != found: %d\n",
- __FUNCTION__, shost->host_failed, found);
-}
-
-/**
* scsi_check_sense - Examine scsi cmd sense
* @scmd: Cmd to have sense checked.
*
@@ -570,7 +508,8 @@
spin_lock_irqsave(scmd->device->host->host_lock, flags);
if (scmd->device->host->hostt->eh_abort_handler)
scmd->device->host->hostt->eh_abort_handler(scmd);
- spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
+ spin_unlock_irqrestore(scmd->device->host->host_lock,
+ flags);
scmd->request->rq_status = RQ_SCSI_DONE;
scmd->owner = SCSI_OWNER_ERROR_HANDLER;
@@ -712,6 +651,7 @@
* scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
* @scmd: Original SCSI cmd that eh has finished.
* @shost: SCSI host that cmd originally failed on.
+ * @done_list: list_head for processed commands.
*
* Notes:
* We don't want to use the normal command completion while we are are
@@ -720,7 +660,8 @@
* keep a list of pending commands for final completion, and once we
* are ready to leave error handling we handle completion for real.
**/
-static void scsi_eh_finish_cmd(Scsi_Cmnd *scmd, struct Scsi_Host *shost)
+static void scsi_eh_finish_cmd(Scsi_Cmnd *scmd, struct Scsi_Host *shost,
+ struct list_head *done_list )
{
shost->host_failed--;
scmd->state = SCSI_STATE_BHQUEUE;
@@ -731,12 +672,14 @@
* things.
*/
scsi_setup_cmd_retry(scmd);
+
+ list_move_tail(&scmd->eh_list, done_list);
}
/**
* scsi_eh_get_sense - Get device sense data.
- * @sc_todo: list of cmds that have failed.
* @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
*
* Description:
* See if we need to request sense information. if so, then get it
@@ -754,23 +697,23 @@
*
* In 2.5 this capability will be going away.
**/
-static int scsi_eh_get_sense(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_get_sense(struct Scsi_Host *shost,
+ struct list_head *done_list)
{
int rtn;
+ struct list_head *lh, *lh_sf;
Scsi_Cmnd *scmd;
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
- " to request sense\n",
- __FUNCTION__));
-
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
- if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_FAILED) ||
+ list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+ if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD) ||
SCSI_SENSE_VALID(scmd))
continue;
SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
- " for tgt: %d\n",
- __FUNCTION__, scmd->device->id));
+ " for id: %d\n",
+ current->comm,
+ scmd->device->id));
rtn = scsi_request_sense(scmd);
if (rtn != SUCCESS)
continue;
@@ -787,7 +730,7 @@
* upper level.
*/
if (rtn == SUCCESS)
- scsi_eh_finish_cmd(scmd, shost);
+ scsi_eh_finish_cmd(scmd, shost, done_list);
if (rtn != NEEDS_RETRY)
continue;
@@ -806,10 +749,10 @@
/*
* we eventually hand this one back to the top level.
*/
- scsi_eh_finish_cmd(scmd, shost);
+ scsi_eh_finish_cmd(scmd, shost, done_list);
}
- return shost->host_failed;
+ return list_empty(&shost->eh_cmd_list);
}
/**
@@ -899,9 +842,9 @@
}
/**
- * scsi_eh_abort_cmd - abort a timed-out cmd.
- * @sc_todo: A list of cmds that have failed.
+ * scsi_eh_abort_cmds - abort canceled commands.
* @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
*
* Decription:
* Try and see whether or not it makes sense to try and abort the
@@ -910,29 +853,36 @@
* no sense to try and abort the command, since as far as the shost
* adapter is concerned, it isn't running.
**/
-static int scsi_eh_abort_cmd(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_abort_cmds(struct Scsi_Host *shost,
+ struct list_head *done_list)
{
-
int rtn;
- Scsi_Cmnd *scmd;
+ struct list_head *lh, *lh_sf;
+ struct scsi_cmnd *scmd;
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
- " to abort cmd\n", __FUNCTION__));
-
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
- if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_TIMEOUT))
+ list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+ if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD))
continue;
-
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
+ "0x%p\n", current->comm,
+ scmd));
rtn = scsi_try_to_abort_cmd(scmd);
if (rtn == SUCCESS) {
- if (!scsi_eh_tur(scmd)) {
- rtn = scsi_eh_retry_cmd(scmd);
- if (rtn == SUCCESS)
- scsi_eh_finish_cmd(scmd, shost);
+ scsi_eh_eflags_clr(scmd, SCSI_EH_CANCEL_CMD);
+ if (!scmd->device->online || !scsi_eh_tur(scmd)) {
+ scsi_eh_finish_cmd(scmd, shost, done_list);
}
- }
+
+ } else
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
+ " cmd failed:"
+ "0x%p\n",
+ current->comm,
+ scmd));
}
- return shost->host_failed;
+
+ return list_empty(&shost->eh_cmd_list);
}
/**
@@ -968,9 +918,9 @@
}
/**
- * scsi_eh_bus_device_reset - send bdr is needed
- * @sc_todo: a list of cmds that have failed.
+ * scsi_eh_bus_device_reset - send bdr if needed
* @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
*
* Notes:
* Try a bus device reset. still, look to see whether we have multiple
@@ -978,39 +928,52 @@
* makes no sense to try bus_device_reset - we really would need to try
* a bus_reset instead.
**/
-static int scsi_eh_bus_device_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
+ struct list_head *done_list)
{
int rtn;
- Scsi_Cmnd *scmd;
- Scsi_Device *sdev;
-
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Trying BDR\n", __FUNCTION__));
+ struct list_head *lh, *lh_sf;
+ struct scsi_cmnd *scmd, *bdr_scmd;
+ struct scsi_device *sdev;
list_for_each_entry(sdev, &shost->my_devices, siblings) {
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
- if ((scmd->device == sdev) &&
- scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
+ bdr_scmd = NULL;
+ list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list)
+ if (scmd->device == sdev) {
+ bdr_scmd = scmd;
break;
+ }
- if (!scmd)
+ if (!bdr_scmd)
continue;
- /*
- * ok, we have a device that is having problems. try and send
- * a bus device reset to it.
- */
- rtn = scsi_try_bus_device_reset(scmd);
- if ((rtn == SUCCESS) && (!scsi_eh_tur(scmd)))
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
- if ((scmd->device == sdev) &&
- scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
- rtn = scsi_eh_retry_cmd(scmd);
- if (rtn == SUCCESS)
- scsi_eh_finish_cmd(scmd, shost);
- }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
+ " 0x%p\n", current->comm,
+ sdev));
+ rtn = scsi_try_bus_device_reset(bdr_scmd);
+ if (rtn == SUCCESS) {
+ if (!sdev->online || !scsi_eh_tur(bdr_scmd)) {
+ list_for_each_safe(lh, lh_sf,
+ &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct
+ scsi_cmnd,
+ eh_list);
+ if (scmd->device == sdev)
+ scsi_eh_finish_cmd(scmd,
+ shost,
+ done_list);
+ }
+ }
+ } else {
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
+ " failed sdev:"
+ "0x%p\n",
+ current->comm,
+ sdev));
+ }
}
- return shost->host_failed;
+ return list_empty(&shost->eh_cmd_list);
}
/**
@@ -1040,7 +1003,8 @@
/*
* Mark all affected devices to expect a unit attention.
*/
- list_for_each_entry(sdev, &scmd->device->host->my_devices, siblings)
+ list_for_each_entry(sdev, &scmd->device->host->my_devices,
+ siblings)
if (scmd->device->channel == sdev->channel) {
sdev->was_reset = 1;
sdev->expecting_cc_ua = 1;
@@ -1076,7 +1040,8 @@
/*
* Mark all affected devices to expect a unit attention.
*/
- list_for_each_entry(sdev, &scmd->device->host->my_devices, siblings)
+ list_for_each_entry(sdev, &scmd->device->host->my_devices,
+ siblings)
if (scmd->device->channel == sdev->channel) {
sdev->was_reset = 1;
sdev->expecting_cc_ua = 1;
@@ -1086,26 +1051,20 @@
}
/**
- * scsi_eh_bus_host_reset - send a bus reset and on failure try host reset
- * @sc_todo: a list of cmds that have failed.
+ * scsi_eh_bus_reset - send a bus reset
* @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
**/
-static int scsi_eh_bus_host_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_bus_reset(struct Scsi_Host *shost,
+ struct list_head *done_list)
{
int rtn;
+ struct list_head *lh, *lh_sf;
Scsi_Cmnd *scmd;
Scsi_Cmnd *chan_scmd;
unsigned int channel;
/*
- * if we ended up here, we have serious problems. the only thing left
- * to try is a full bus reset. if someone has grabbed the bus and isn't
- * letting go, then perhaps this will help.
- */
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Try Bus/Host RST\n",
- __FUNCTION__));
-
- /*
* we really want to loop over the various channels, and do this on
* a channel by channel basis. we should also check to see if any
* of the failed commands are on soft_reset devices, and if so, skip
@@ -1113,9 +1072,8 @@
*/
for (channel = 0; channel <= shost->max_channel; channel++) {
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
- if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
- continue;
+ chan_scmd = NULL;
+ list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
if (channel == scmd->device->channel) {
chan_scmd = scmd;
break;
@@ -1126,63 +1084,97 @@
}
}
- if (!scmd)
+ if (!chan_scmd)
continue;
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
+ " %d\n", current->comm,
+ channel));
+ rtn = scsi_try_bus_reset(chan_scmd);
+ if (rtn == SUCCESS) {
+ list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct scsi_cmnd,
+ eh_list);
+ if (channel == scmd->device->channel)
+ if (!scmd->device->online ||
+ !scsi_eh_tur(scmd))
+ scsi_eh_finish_cmd(scmd,
+ shost,
+ done_list);
+ }
+ } else {
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
+ " failed chan: %d\n",
+ current->comm,
+ channel));
+ }
+ }
+ return list_empty(&shost->eh_cmd_list);
+}
- /*
- * we now know that we are able to perform a reset for the
- * channel that scmd points to.
- */
- rtn = scsi_try_bus_reset(scmd);
- if (rtn != SUCCESS)
- rtn = scsi_try_host_reset(scmd);
+/**
+ * scsi_eh_host_reset - send a host reset
+ * @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
+ **/
+static int scsi_eh_host_reset(struct Scsi_Host *shost,
+ struct list_head *done_list)
+{
+ int rtn;
+ struct list_head *lh, *lh_sf;
+ Scsi_Cmnd *scmd;
- if (rtn == SUCCESS) {
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
- if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)
- || channel != scmd->device->channel)
- continue;
- if (!scsi_eh_tur(scmd)) {
- rtn = scsi_eh_retry_cmd(scmd);
+ if (!list_empty(&shost->eh_cmd_list)) {
+ scmd = list_entry(shost->eh_cmd_list.next,
+ struct scsi_cmnd, eh_list);
- if (rtn == SUCCESS)
- scsi_eh_finish_cmd(scmd, shost);
- }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
+ , current->comm));
+
+ rtn = scsi_try_host_reset(scmd);
+ if (rtn == SUCCESS) {
+ list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+ if (!scmd->device->online || !scsi_eh_tur(scmd))
+ scsi_eh_finish_cmd(scmd, shost,
+ done_list);
}
+ } else {
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
+ " failed\n",
+ current->comm));
}
-
}
- return shost->host_failed;
+ return list_empty(&shost->eh_cmd_list);
}
/**
* scsi_eh_offline_sdevs - offline scsi devices that fail to recover
- * @sc_todo: a list of cmds that have failed.
* @shost: scsi host being recovered.
+ * @done_list: list_head for processed commands.
*
**/
-static void scsi_eh_offline_sdevs(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static void scsi_eh_offline_sdevs(struct Scsi_Host *shost,
+ struct list_head *done_list)
{
+ struct list_head *lh, *lh_sf;
Scsi_Cmnd *scmd;
- for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
- if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
- continue;
-
+ list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+ scmd = list_entry(lh, struct scsi_cmnd, eh_list);
printk(KERN_INFO "scsi: Device offlined - not"
- " ready or command retry failed"
- " after error recovery: host"
+ " ready after error recovery: host"
" %d channel %d id %d lun %d\n",
shost->host_no,
scmd->device->channel,
scmd->device->id,
scmd->device->lun);
-
- if (scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_TIMEOUT))
- scmd->result |= (DRIVER_TIMEOUT << 24);
-
- scmd->device->online = 0;
- scsi_eh_finish_cmd(scmd, shost);
+ scmd->device->online = FALSE;
+ if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
+ /*
+ * FIXME: Handle lost cmds.
+ */
+ }
+ scsi_eh_finish_cmd(scmd, shost, done_list);
}
return;
}
@@ -1477,6 +1469,8 @@
ASSERT_LOCK(shost->host_lock, 0);
+ shost->in_recovery = 0;
+
/*
* If the door was locked, we need to insert a door lock request
* onto the head of the SCSI request queue for the device. There
@@ -1517,6 +1511,56 @@
}
/**
+ * scsi_eh_ready_devs - check device ready state and recover if not.
+ * @shost: host to be recovered.
+ * @done_list: list_head for processed commands.
+ *
+ **/
+static void scsi_eh_ready_devs(struct Scsi_Host *shost,
+ struct list_head *done_list)
+{
+ if (scsi_eh_bus_device_reset(shost, done_list))
+ if (scsi_eh_bus_reset(shost, done_list))
+ if (scsi_eh_host_reset(shost, done_list))
+ scsi_eh_offline_sdevs(shost, done_list);
+}
+
+/**
+ * scsi_eh_flush_done_list - finish processed commands or retry them.
+ * @shost: host to be recovered.
+ * @done_list: list_head of processed commands.
+ *
+ **/
+static void scsi_eh_flush_done_list(struct Scsi_Host *shost,
+ struct list_head *done_list)
+{
+ struct list_head *lh, *lh_sf;
+ Scsi_Cmnd *scmd;
+
+ list_for_each_safe(lh, lh_sf, done_list) {
+ scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+ list_del_init(lh);
+ if (!scmd->device->online) {
+ scmd->result |= (DRIVER_TIMEOUT << 24);
+ } else {
+ if (++scmd->retries < scmd->allowed) {
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("%s: flush retry"
+ " cmd: %p\n",
+ current->comm,
+ scmd));
+ scsi_retry_command(scmd);
+ continue;
+ }
+ }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
+ " cmd: %p\n",
+ current->comm, scmd));
+ scsi_finish_command(scmd);
+ }
+}
+
+/**
* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
* @shost: Host to unjam.
*
@@ -1541,60 +1585,15 @@
**/
static void scsi_unjam_host(struct Scsi_Host *shost)
{
- Scsi_Cmnd *sc_todo = NULL;
- Scsi_Cmnd *scmd;
-
- /*
- * Is this assert really ok anymore (andmike). Should we at least
- * be using spin_lock_unlocked.
- */
- ASSERT_LOCK(shost->host_lock, 0);
-
- scsi_eh_get_failed(&sc_todo, shost);
-
- if (scsi_eh_get_sense(sc_todo, shost))
- if (scsi_eh_abort_cmd(sc_todo, shost))
- if (scsi_eh_bus_device_reset(sc_todo, shost))
- if (scsi_eh_bus_host_reset(sc_todo, shost))
- scsi_eh_offline_sdevs(sc_todo, shost);
+ LIST_HEAD(done_list);
- BUG_ON(shost->host_failed);
+ SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost));
+ if (!scsi_eh_get_sense(shost, &done_list))
+ if (!scsi_eh_abort_cmds(shost, &done_list))
+ scsi_eh_ready_devs(shost, &done_list);
- /*
- * We are currently holding these things in a linked list - we
- * didn't put them in the bottom half queue because we wanted to
- * keep things quiet while we were working on recovery, and
- * passing them up to the top level could easily cause the top
- * level to try and queue something else again.
- *
- * start by marking that the host is no longer in error recovery.
- */
- shost->in_recovery = 0;
-
- /*
- * take the list of commands, and stick them in the bottom half queue.
- * the current implementation of scsi_done will do this for us - if need
- * be we can create a special version of this function to do the
- * same job for us.
- */
- for (scmd = sc_todo; scmd; scmd = sc_todo) {
- sc_todo = scmd->bh_next;
- scmd->bh_next = NULL;
- /*
- * Oh, this is a vile hack. scsi_done() expects a timer
- * to be running on the command. If there isn't, it assumes
- * that the command has actually timed out, and a timer
- * handler is running. That may well be how we got into
- * this fix, but right now things are stable. We add
- * a timer back again so that we can report completion.
- * scsi_done() will immediately remove said timer from
- * the command, and then process it.
- */
- scsi_add_timer(scmd, 100, scsi_eh_times_out);
- scsi_done(scmd);
- }
-
+ scsi_eh_flush_done_list(shost, &done_list);
}
/**
@@ -1642,7 +1641,8 @@
/*
* Wake up the thread that created us.
*/
- SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of scsi_eh_%d\n",shost->host_no));
+ SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
+ " scsi_eh_%d\n",shost->host_no));
complete(shost->eh_notify);
@@ -1652,7 +1652,9 @@
* away and die. This typically happens if the user is
* trying to unload a module.
*/
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d sleeping\n",shost->host_no));
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+ " scsi_eh_%d"
+ " sleeping\n",shost->host_no));
/*
* Note - we always use down_interruptible with the semaphore
@@ -1667,7 +1669,9 @@
if (shost->eh_kill)
break;
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d waking up\n",shost->host_no));
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+ " scsi_eh_%d waking"
+ " up\n",shost->host_no));
shost->eh_active = 1;
@@ -1695,7 +1699,8 @@
}
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d exiting\n",shost->host_no));
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
+ " exiting\n",shost->host_no));
/*
* Make sure that nobody tries to wake us up again.
next prev parent reply other threads:[~2003-02-11 8:16 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-02-11 8:13 [PATCH / RFC] scsi_error handler update. (1/4) Mike Anderson
2003-02-11 8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
2003-02-11 8:17 ` Mike Anderson [this message]
2003-02-11 8:19 ` [PATCH / RFC] scsi_error handler update. (4/4) Mike Anderson
2003-02-11 22:38 ` [PATCH / RFC] scsi_error handler update. (3/4) James Bottomley
2003-02-12 7:16 ` Mike Anderson
2003-02-12 14:26 ` Luben Tuikov
2003-02-12 14:37 ` James Bottomley
2003-02-12 22:34 ` James Bottomley
2003-02-13 8:24 ` Mike Anderson
2003-02-11 16:49 ` [PATCH / RFC] scsi_error handler update. (1/4) Luben Tuikov
2003-02-11 17:22 ` Mike Anderson
2003-02-11 19:05 ` Luben Tuikov
2003-02-11 20:14 ` Luben Tuikov
2003-02-11 21:14 ` Mike Anderson
[not found] ` <3E495862.3050709@splentec.com>
2003-02-11 21:20 ` Mike Anderson
2003-02-11 21:22 ` Luben Tuikov
2003-02-11 22:41 ` Christoph Hellwig
2003-02-12 20:10 ` Luben Tuikov
2003-02-12 20:46 ` Christoph Hellwig
2003-02-12 21:23 ` Mike Anderson
2003-02-12 22:15 ` Luben Tuikov
2003-02-12 21:46 ` Luben Tuikov
2003-02-13 15:47 ` Christoph Hellwig
2003-02-13 18:55 ` Luben Tuikov
2003-02-14 0:24 ` Doug Ledford
2003-02-14 16:38 ` Patrick Mansfield
2003-02-14 16:58 ` Mike Anderson
2003-02-14 18:50 ` Doug Ledford
2003-02-14 19:35 ` Luben Tuikov
2003-02-14 21:20 ` James Bottomley
2003-02-17 17:20 ` Luben Tuikov
2003-02-17 17:58 ` James Bottomley
2003-02-17 18:29 ` Luben Tuikov
2003-02-18 5:37 ` Andre Hedrick
2003-02-18 19:46 ` Luben Tuikov
2003-02-18 22:16 ` Andre Hedrick
2003-02-18 23:35 ` Luben Tuikov
2003-02-17 20:17 ` Doug Ledford
2003-02-17 20:19 ` Matthew Jacob
2003-02-17 21:12 ` Luben Tuikov
2003-02-17 17:35 ` Luben Tuikov
2003-02-14 21:27 ` James Bottomley
2003-02-17 17:28 ` Luben Tuikov
2003-02-16 4:23 ` Andre Hedrick
2003-02-11 18:00 ` Patrick Mansfield
2003-02-11 18:44 ` Mike Anderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030211081744.GC1368@beaverton.ibm.com \
--to=andmike@us.ibm.com \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox