* Re: Infinite retries
2008-10-19 10:47 ` Boaz Harrosh
@ 2008-10-19 15:28 ` Alan Stern
2008-10-19 17:07 ` Boaz Harrosh
0 siblings, 1 reply; 4+ messages in thread
From: Alan Stern @ 2008-10-19 15:28 UTC (permalink / raw)
To: Boaz Harrosh; +Cc: James Bottomley, SCSI development list, USB Storage list
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1194 bytes --]
On Sun, 19 Oct 2008, Boaz Harrosh wrote:
> Alan Stern wrote:
> > We do have a problem with infinite retry loops. I'm not sure which
> > kernels are affected, but there's a good chance 2.6.27 is and an
> > excellent chance that 2.6.28-rc1 will be.
...
> Do you have the scsi_io_completion patchset on a public git somewhere?
> I would like to re-test them and review them again.
They aren't in any git repositories, so I am including the two patches
as attachments to this message. The first patch changes the failure
analysis logic in scsi_io_completion() along the lines suggested by
James, and the second gets rid of scsi_end_request(). They are based
roughly on 2.6.27, so they might not apply cleanly after the merge
window.
Neither patch addresses the infinite-retry problem; I wanted to keep
the issues separate.
> Did you try them with above problem and do they solve the issue?
At this point I can't remember exactly which combinations I tried! :-)
However I don't think these patches will have any effect on the retry
loop.
> Also have you looked farther into the retries/timeout issues from
> block layer?
Not yet. I'm waiting for 2.6.28-rc1 to appear.
Alan Stern
[-- Attachment #2: Type: TEXT/plain, Size: 4963 bytes --]
Index: usb-2.6/drivers/scsi/scsi_lib.c
===================================================================
--- usb-2.6.orig/drivers/scsi/scsi_lib.c
+++ usb-2.6/drivers/scsi/scsi_lib.c
@@ -859,6 +859,8 @@ void scsi_io_completion(struct scsi_cmnd
struct scsi_sense_hdr sshdr;
int sense_valid = 0;
int sense_deferred = 0;
+ enum {ACTION_FAIL, ACTION_REQUEUE, ACTION_RETRY,
+ ACTION_DELAYED_RETRY} action;
if (result) {
sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
@@ -909,11 +911,15 @@ void scsi_io_completion(struct scsi_cmnd
if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
return;
this_count = blk_rq_bytes(req);
+ action = ACTION_FAIL;
- /* good_bytes = 0, or (inclusive) there were leftovers and
- * result = 0, so scsi_end_request couldn't retry.
- */
- if (sense_valid && !sense_deferred) {
+ if (host_byte(result) == DID_RESET) {
+ /* Third party bus reset or reset for error recovery
+ * reasons. Just retry the command and see what
+ * happens.
+ */
+ action = ACTION_RETRY;
+ } else if (sense_valid && !sense_deferred) {
switch (sshdr.sense_key) {
case UNIT_ATTENTION:
if (cmd->device->removable) {
@@ -921,16 +927,13 @@ void scsi_io_completion(struct scsi_cmnd
* and quietly refuse further access.
*/
cmd->device->changed = 1;
- scsi_end_request(cmd, -EIO, this_count, 1);
- return;
} else {
/* Must have been a power glitch, or a
* bus reset. Could not have been a
* media change, so we just retry the
- * request and see what happens.
+ * command and see what happens.
*/
- scsi_requeue_command(q, cmd);
- return;
+ action = ACTION_RETRY;
}
break;
case ILLEGAL_REQUEST:
@@ -946,22 +949,13 @@ void scsi_io_completion(struct scsi_cmnd
sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
(cmd->cmnd[0] == READ_10 ||
cmd->cmnd[0] == WRITE_10)) {
+ /* This will issue a new 6-byte command. */
cmd->device->use_10_for_rw = 0;
- /* This will cause a retry with a
- * 6-byte command.
- */
- scsi_requeue_command(q, cmd);
- } else if (sshdr.asc == 0x10) /* DIX */
- scsi_end_request(cmd, -EIO, this_count, 0);
- else
- scsi_end_request(cmd, -EIO, this_count, 1);
- return;
- case ABORTED_COMMAND:
- if (sshdr.asc == 0x10) { /* DIF */
- scsi_end_request(cmd, -EIO, this_count, 0);
- return;
+ action = ACTION_REQUEUE;
}
break;
+ case ABORTED_COMMAND:
+ break;
case NOT_READY:
/* If the device is in the process of becoming
* ready, or has a temporary blockage, retry.
@@ -975,19 +969,16 @@ void scsi_io_completion(struct scsi_cmnd
case 0x07: /* operation in progress */
case 0x08: /* Long write in progress */
case 0x09: /* self test in progress */
- scsi_requeue_command(q, cmd);
- return;
- default:
+ action = ACTION_DELAYED_RETRY;
break;
}
}
- if (!(req->cmd_flags & REQ_QUIET))
+ if (!(req->cmd_flags & REQ_QUIET) &&
+ action == ACTION_FAIL)
scsi_cmd_print_sense_hdr(cmd,
"Device not ready",
&sshdr);
-
- scsi_end_request(cmd, -EIO, this_count, 1);
- return;
+ break;
case VOLUME_OVERFLOW:
if (!(req->cmd_flags & REQ_QUIET)) {
scmd_printk(KERN_INFO, cmd,
@@ -996,28 +987,38 @@ void scsi_io_completion(struct scsi_cmnd
scsi_print_sense("", cmd);
}
/* See SSC3rXX or current. */
- scsi_end_request(cmd, -EIO, this_count, 1);
- return;
- default:
break;
}
}
- if (host_byte(result) == DID_RESET) {
- /* Third party bus reset or reset for error recovery
- * reasons. Just retry the request and see what
- * happens.
+
+ switch (action) {
+ case ACTION_FAIL:
+ /* Give up and fail the remainder of the request */
+ if (result) {
+ if (!(req->cmd_flags & REQ_QUIET)) {
+ scsi_print_result(cmd);
+ if (driver_byte(result) & DRIVER_SENSE)
+ scsi_print_sense("", cmd);
+ }
+ }
+ blk_end_request(req, -EIO, blk_rq_bytes(req));
+ scsi_next_command(cmd);
+ break;
+ case ACTION_REQUEUE:
+ /* Put the request back at the head of the queue.
+ * A new command will be prepared and issued.
*/
scsi_requeue_command(q, cmd);
- return;
- }
- if (result) {
- if (!(req->cmd_flags & REQ_QUIET)) {
- scsi_print_result(cmd);
- if (driver_byte(result) & DRIVER_SENSE)
- scsi_print_sense("", cmd);
- }
+ break;
+ case ACTION_RETRY:
+ /* Retry the same command immediately */
+ scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
+ break;
+ case ACTION_DELAYED_RETRY:
+ /* Retry the same command after a delay */
+ scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
+ break;
}
- scsi_end_request(cmd, -EIO, this_count, !result);
}
static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
[-- Attachment #3: Type: TEXT/plain, Size: 3974 bytes --]
Index: usb-2.6/drivers/scsi/scsi_lib.c
===================================================================
--- usb-2.6.orig/drivers/scsi/scsi_lib.c
+++ usb-2.6/drivers/scsi/scsi_lib.c
@@ -642,69 +642,6 @@ void scsi_run_host_queues(struct Scsi_Ho
scsi_run_queue(sdev->request_queue);
}
-/*
- * Function: scsi_end_request()
- *
- * Purpose: Post-processing of completed commands (usually invoked at end
- * of upper level post-processing and scsi_io_completion).
- *
- * Arguments: cmd - command that is complete.
- * error - 0 if I/O indicates success, < 0 for I/O error.
- * bytes - number of bytes of completed I/O
- * requeue - indicates whether we should requeue leftovers.
- *
- * Lock status: Assumed that lock is not held upon entry.
- *
- * Returns: cmd if requeue required, NULL otherwise.
- *
- * Notes: This is called for block device requests in order to
- * mark some number of sectors as complete.
- *
- * We are guaranteeing that the request queue will be goosed
- * at some point during this call.
- * Notes: If cmd was requeued, upon return it will be a stale pointer.
- */
-static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
- int bytes, int requeue)
-{
- struct request_queue *q = cmd->device->request_queue;
- struct request *req = cmd->request;
-
- /*
- * If there are blocks left over at the end, set up the command
- * to queue the remainder of them.
- */
- if (blk_end_request(req, error, bytes)) {
- int leftover = (req->hard_nr_sectors << 9);
-
- if (blk_pc_request(req))
- leftover = req->data_len;
-
- /* kill remainder if no retrys */
- if (error && blk_noretry_request(req))
- blk_end_request(req, error, leftover);
- else {
- if (requeue) {
- /*
- * Bleah. Leftovers again. Stick the
- * leftovers in the front of the
- * queue, and goose the queue again.
- */
- scsi_requeue_command(q, cmd);
- cmd = NULL;
- }
- return cmd;
- }
- }
-
- /*
- * This will goose the queue request function at the end, so we don't
- * need to worry about launching another command.
- */
- scsi_next_command(cmd);
- return NULL;
-}
-
static inline unsigned int scsi_sgtable_index(unsigned short nents)
{
unsigned int index;
@@ -852,7 +789,6 @@ static void scsi_end_bidi_request(struct
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
{
int result = cmd->result;
- int this_count;
struct request_queue *q = cmd->device->request_queue;
struct request *req = cmd->request;
int error = 0;
@@ -903,22 +839,28 @@ void scsi_io_completion(struct scsi_cmnd
SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
"%d bytes done.\n",
req->nr_sectors, good_bytes));
-
- /* A number of bytes were successfully read. If there
- * are leftovers and there is some kind of error
- * (result != 0), retry the rest.
- */
- if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
+ if (blk_end_request(req, error, good_bytes) == 0) {
+ /* This request is completely finished; start the next one */
+ scsi_next_command(cmd);
return;
- this_count = blk_rq_bytes(req);
+ }
+
+ /* The request isn't finished yet. Figure out what to do next. */
action = ACTION_FAIL;
+ if (result == 0) {
+ /* No error, so carry out the remainder of the request. */
+ action = ACTION_REQUEUE;
- if (host_byte(result) == DID_RESET) {
+ } else if (error && blk_noretry_request(req)) {
+ /* Retrys are disallowed, so kill the remainder. */
+
+ } else if (host_byte(result) == DID_RESET) {
/* Third party bus reset or reset for error recovery
* reasons. Just retry the command and see what
* happens.
*/
action = ACTION_RETRY;
+
} else if (sense_valid && !sense_deferred) {
switch (sshdr.sense_key) {
case UNIT_ATTENTION:
^ permalink raw reply [flat|nested] 4+ messages in thread