* [PATCH] ATAPI error handling work
@ 2005-10-05 20:51 Jeff Garzik
2005-10-06 5:35 ` Tejun Heo
0 siblings, 1 reply; 5+ messages in thread
From: Jeff Garzik @ 2005-10-05 20:51 UTC (permalink / raw)
To: linux-ide@vger.kernel.org; +Cc: Tejun Heo, Douglas Gilbert
[-- Attachment #1: Type: text/plain, Size: 239 bytes --]
This is a work-in-progress, attempting to fix the death of libata
whenever there is a SCSI error thrown to us... IOW this fixes libata
EH to properly fake autosensing, rather than kicking over to the SCSI EH
thread for that purpose.
[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 7405 bytes --]
drivers/scsi/libata-core.c | 90 ++++++++++-----------------------------------
drivers/scsi/libata-scsi.c | 84 ++++++++++++++++++++++++++++++++++--------
drivers/scsi/libata.h | 0
include/linux/libata.h | 1
4 files changed, 90 insertions(+), 85 deletions(-)
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3015,52 +3015,6 @@ fsm_start:
goto fsm_start;
}
-static void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
- struct scsi_cmnd *cmd)
-{
- DECLARE_COMPLETION(wait);
- struct ata_queued_cmd *qc;
- unsigned long flags;
- int rc;
-
- DPRINTK("ATAPI request sense\n");
-
- qc = ata_qc_new_init(ap, dev);
- BUG_ON(qc == NULL);
-
- /* FIXME: is this needed? */
- memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
-
- ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
- qc->dma_dir = DMA_FROM_DEVICE;
-
- memset(&qc->cdb, 0, ap->cdb_len);
- qc->cdb[0] = REQUEST_SENSE;
- qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
- qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
- qc->tf.command = ATA_CMD_PACKET;
-
- qc->tf.protocol = ATA_PROT_ATAPI;
- qc->tf.lbam = (8 * 1024) & 0xff;
- qc->tf.lbah = (8 * 1024) >> 8;
- qc->nbytes = SCSI_SENSE_BUFFERSIZE;
-
- qc->waiting = &wait;
- qc->complete_fn = ata_qc_complete_noop;
-
- spin_lock_irqsave(&ap->host_set->lock, flags);
- rc = ata_qc_issue(qc);
- spin_unlock_irqrestore(&ap->host_set->lock, flags);
-
- if (rc)
- ata_port_disable(ap);
- else
- wait_for_completion(&wait);
-
- DPRINTK("EXIT\n");
-}
-
/**
* ata_qc_timeout - Handle timeout of queued command
* @qc: Command that timed out
@@ -3084,32 +3038,11 @@ static void ata_qc_timeout(struct ata_qu
{
struct ata_port *ap = qc->ap;
struct ata_host_set *host_set = ap->host_set;
- struct ata_device *dev = qc->dev;
u8 host_stat = 0, drv_stat;
unsigned long flags;
DPRINTK("ENTER\n");
- /* FIXME: doesn't this conflict with timeout handling? */
- if (qc->dev->class == ATA_DEV_ATAPI && qc->scsicmd) {
- struct scsi_cmnd *cmd = qc->scsicmd;
-
- if (!(cmd->eh_eflags & SCSI_EH_CANCEL_CMD)) {
-
- /* finish completing original command */
- spin_lock_irqsave(&host_set->lock, flags);
- __ata_qc_complete(qc);
- spin_unlock_irqrestore(&host_set->lock, flags);
-
- atapi_request_sense(ap, dev, cmd);
-
- cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
- scsi_finish_command(cmd);
-
- goto out;
- }
- }
-
spin_lock_irqsave(&host_set->lock, flags);
/* hack alert! We cannot use the supplied completion
@@ -3148,7 +3081,6 @@ static void ata_qc_timeout(struct ata_qu
spin_unlock_irqrestore(&host_set->lock, flags);
-out:
DPRINTK("EXIT\n");
}
@@ -3174,17 +3106,35 @@ out:
void ata_eng_timeout(struct ata_port *ap)
{
struct ata_queued_cmd *qc;
+ struct ata_host_set *host_set = ap->host_set;
+ unsigned long flags;
+ int printed_message = 0;
DPRINTK("ENTER\n");
+ spin_lock_irqsave(&host_set->lock, flags);
+ while (ap->flags & ATA_FLAG_IN_EH) {
+ spin_unlock_irqrestore(&host_set->lock, flags);
+
+ if (!printed_message++)
+ printk(KERN_INFO "ata%u: waiting in timeout handler for EH\n",
+ ap->id);
+
+ msleep(250);
+
+ spin_lock_irqsave(&host_set->lock, flags);
+ }
+ spin_unlock_irqrestore(&host_set->lock, flags);
+
qc = ata_qc_from_tag(ap, ap->active_tag);
- if (!qc) {
+ if (qc)
+ ata_qc_timeout(qc);
+ else {
printk(KERN_ERR "ata%u: BUG: timeout without command\n",
ap->id);
goto out;
}
- ata_qc_timeout(qc);
out:
DPRINTK("EXIT\n");
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -225,7 +225,7 @@ void ata_to_sense_error(struct ata_queue
};
int i = 0;
- cmd->result = SAM_STAT_CHECK_CONDITION;
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
/*
* Is this an error we can process/parse
@@ -1468,7 +1468,7 @@ unsigned int ata_scsiop_report_luns(stru
void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq)
{
DPRINTK("ENTER\n");
- cmd->result = SAM_STAT_CHECK_CONDITION;
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
cmd->sense_buffer[0] = 0x70;
cmd->sense_buffer[2] = ILLEGAL_REQUEST;
@@ -1479,28 +1479,81 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
done(cmd);
}
+static int atapi_qc_complete_sense(struct ata_queued_cmd *qc, u8 drv_stat)
+{
+ struct scsi_cmnd *cmd = qc->scsicmd;
+
+ if (ata_ok(drv_stat))
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+ else
+ ata_to_sense_error(qc, drv_stat);
+
+ qc->ap->flags &= ~ATA_FLAG_IN_EH;
+ qc->scsidone(cmd);
+ return 0;
+}
+
+static void atapi_request_sense(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ int rc;
+
+ DPRINTK("ATAPI request sense\n");
+
+ /* FIXME: is this needed? */
+ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
+
+ ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
+ qc->dma_dir = DMA_FROM_DEVICE;
+
+ memset(&qc->cdb, 0, ap->cdb_len);
+ qc->cdb[0] = REQUEST_SENSE;
+ qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
+
+ qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+ qc->tf.command = ATA_CMD_PACKET;
+
+ qc->tf.protocol = ATA_PROT_ATAPI;
+ qc->tf.lbam = (8 * 1024) & 0xff;
+ qc->tf.lbah = (8 * 1024) >> 8;
+ qc->nbytes = SCSI_SENSE_BUFFERSIZE;
+
+ qc->complete_fn = atapi_qc_complete_sense;
+
+ rc = ata_qc_issue(qc);
+
+ if (rc) {
+ /* FIXME: complete failing command with an error */
+ ata_port_disable(ap);
+ }
+
+ /* FIXME: add timer for timeout of this command */
+ /* control flow continues in atapi_qc_complete_sense() */
+
+ DPRINTK("EXIT\n");
+}
+
static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
{
+ struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
+ rmb();
+ if (ap->host->eh_active)
+ return 1;
+
if (unlikely(drv_stat & (ATA_BUSY | ATA_DRQ)))
ata_to_sense_error(qc, drv_stat);
+
else if (unlikely(drv_stat & ATA_ERR)) {
DPRINTK("request check condition\n");
-
- /* FIXME: command completion with check condition
- * but no sense causes the error handler to run,
- * which then issues REQUEST SENSE, fills in the sense
- * buffer, and completes the command (for the second
- * time). We need to issue REQUEST SENSE some other
- * way, to avoid completing the command twice.
- */
- cmd->result = SAM_STAT_CHECK_CONDITION;
-
- qc->scsidone(cmd);
-
+ ap->flags |= ATA_FLAG_IN_EH;
+ atapi_request_sense(qc);
return 1;
- } else {
+ }
+
+ else {
u8 *scsicmd = cmd->cmnd;
if (scsicmd[0] == INQUIRY) {
@@ -1535,6 +1588,7 @@ static int atapi_qc_complete(struct ata_
return 0;
}
+
/**
* atapi_xlat - Initialize PACKET taskfile
* @qc: command structure to be initialized
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
diff --git a/include/linux/libata.h b/include/linux/libata.h
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -118,6 +118,7 @@ enum {
ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */
ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once
* proper HSM is in place. */
+ ATA_FLAG_IN_EH = (1 << 10),
ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ATAPI error handling work
2005-10-05 20:51 [PATCH] ATAPI error handling work Jeff Garzik
@ 2005-10-06 5:35 ` Tejun Heo
2005-10-06 10:09 ` Jeff Garzik
0 siblings, 1 reply; 5+ messages in thread
From: Tejun Heo @ 2005-10-06 5:35 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide@vger.kernel.org, Douglas Gilbert
Hello, Jeff.
On Wed, Oct 05, 2005 at 04:51:19PM -0400, Jeff Garzik wrote:
> This is a work-in-progress, attempting to fix the death of libata
> whenever there is a SCSI error thrown to us... IOW this fixes libata
> EH to properly fake autosensing, rather than kicking over to the SCSI EH
> thread for that purpose.
>
Great. I just have a few questions. Oh.. BTW, against which head is
the patch generated? I couldn't get it applied cleanly to either
upstream or ALL.
[- snip -]
> @@ -3174,17 +3106,35 @@ out:
> void ata_eng_timeout(struct ata_port *ap)
> {
> struct ata_queued_cmd *qc;
> + struct ata_host_set *host_set = ap->host_set;
> + unsigned long flags;
> + int printed_message = 0;
>
> DPRINTK("ENTER\n");
>
> + spin_lock_irqsave(&host_set->lock, flags);
> + while (ap->flags & ATA_FLAG_IN_EH) {
> + spin_unlock_irqrestore(&host_set->lock, flags);
> +
> + if (!printed_message++)
> + printk(KERN_INFO "ata%u: waiting in timeout handler for EH\n",
> + ap->id);
> +
> + msleep(250);
> +
> + spin_lock_irqsave(&host_set->lock, flags);
> + }
> + spin_unlock_irqrestore(&host_set->lock, flags);
> +
I'm not really sure if synchronization around reqsense with
ATA_FLAG_IN_EH is such a good idea for the following reasons.
* IMHO, tugging in request sense into the timeout window of the
original command is okay (or even better).
* As long as we grab host_set lock while submitting request sense,
which we already does, the synchronization can be done safely in EH
proper. libata EH isn't currently ready for this but other commands
are dealt the same way and fixing EH is the correct way, IMHO.
Note that we really don't have to do anything special for timing out
during request sense. Once we fix EH/interrupt handler race, it can
be handled the same way without any speical casing.
> qc = ata_qc_from_tag(ap, ap->active_tag);
> - if (!qc) {
> + if (qc)
> + ata_qc_timeout(qc);
> + else {
> printk(KERN_ERR "ata%u: BUG: timeout without command\n",
> ap->id);
> goto out;
> }
>
> - ata_qc_timeout(qc);
>
> out:
> DPRINTK("EXIT\n");
I think you were trying to kill goto out and the label by moving
ata_qc_timeout() into if condition, no? Currently, the goto out seem
a bit funny.
> diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
> --- a/drivers/scsi/libata-scsi.c
> +++ b/drivers/scsi/libata-scsi.c
> @@ -225,7 +225,7 @@ void ata_to_sense_error(struct ata_queue
> };
> int i = 0;
>
> - cmd->result = SAM_STAT_CHECK_CONDITION;
> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>
> /*
> * Is this an error we can process/parse
> @@ -1468,7 +1468,7 @@ unsigned int ata_scsiop_report_luns(stru
> void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq)
> {
> DPRINTK("ENTER\n");
> - cmd->result = SAM_STAT_CHECK_CONDITION;
> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>
> cmd->sense_buffer[0] = 0x70;
> cmd->sense_buffer[2] = ILLEGAL_REQUEST;
> @@ -1479,28 +1479,81 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
> done(cmd);
> }
>
> +static int atapi_qc_complete_sense(struct ata_queued_cmd *qc, u8 drv_stat)
> +{
> + struct scsi_cmnd *cmd = qc->scsicmd;
> +
> + if (ata_ok(drv_stat))
> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
> + else
> + ata_to_sense_error(qc, drv_stat);
> +
> + qc->ap->flags &= ~ATA_FLAG_IN_EH;
> + qc->scsidone(cmd);
> + return 0;
> +}
> +
> +static void atapi_request_sense(struct ata_queued_cmd *qc)
> +{
> + struct ata_port *ap = qc->ap;
> + struct scsi_cmnd *cmd = qc->scsicmd;
> + int rc;
> +
> + DPRINTK("ATAPI request sense\n");
> +
> + /* FIXME: is this needed? */
> + memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
This is just a side note as this part seems to be just copied from
the original request_sense function. Anyways, I think this isn't
needed anymore. SCSI ML seems to clear things pretty thoroughly these
days.
> +
> + ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
> + qc->dma_dir = DMA_FROM_DEVICE;
> +
> + memset(&qc->cdb, 0, ap->cdb_len);
> + qc->cdb[0] = REQUEST_SENSE;
> + qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
> +
> + qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
> + qc->tf.command = ATA_CMD_PACKET;
> +
> + qc->tf.protocol = ATA_PROT_ATAPI;
> + qc->tf.lbam = (8 * 1024) & 0xff;
> + qc->tf.lbah = (8 * 1024) >> 8;
> + qc->nbytes = SCSI_SENSE_BUFFERSIZE;
> +
> + qc->complete_fn = atapi_qc_complete_sense;
> +
> + rc = ata_qc_issue(qc);
> +
> + if (rc) {
> + /* FIXME: complete failing command with an error */
> + ata_port_disable(ap);
> + }
> +
> + /* FIXME: add timer for timeout of this command */
> + /* control flow continues in atapi_qc_complete_sense() */
> +
> + DPRINTK("EXIT\n");
> +}
> +
> static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
> {
> + struct ata_port *ap = qc->ap;
> struct scsi_cmnd *cmd = qc->scsicmd;
>
> + rmb();
I'm having difficult time understanding this rmb(). What is it
protecting against?
> + if (ap->host->eh_active)
> + return 1;
> +
> if (unlikely(drv_stat & (ATA_BUSY | ATA_DRQ)))
> ata_to_sense_error(qc, drv_stat);
> +
> else if (unlikely(drv_stat & ATA_ERR)) {
> DPRINTK("request check condition\n");
> -
> - /* FIXME: command completion with check condition
> - * but no sense causes the error handler to run,
> - * which then issues REQUEST SENSE, fills in the sense
> - * buffer, and completes the command (for the second
> - * time). We need to issue REQUEST SENSE some other
> - * way, to avoid completing the command twice.
> - */
> - cmd->result = SAM_STAT_CHECK_CONDITION;
> -
> - qc->scsidone(cmd);
> -
> + ap->flags |= ATA_FLAG_IN_EH;
> + atapi_request_sense(qc);
> return 1;
> - } else {
> + }
> +
> + else {
> u8 *scsicmd = cmd->cmnd;
>
> if (scsicmd[0] == INQUIRY) {
> @@ -1535,6 +1588,7 @@ static int atapi_qc_complete(struct ata_
>
> return 0;
> }
> +
> /**
> * atapi_xlat - Initialize PACKET taskfile
> * @qc: command structure to be initialized
> diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -118,6 +118,7 @@ enum {
> ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */
> ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once
> * proper HSM is in place. */
> + ATA_FLAG_IN_EH = (1 << 10),
>
> ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
> ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
Thanks. :-)
--
tejun
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ATAPI error handling work
2005-10-06 5:35 ` Tejun Heo
@ 2005-10-06 10:09 ` Jeff Garzik
2005-10-06 14:56 ` Tejun Heo
0 siblings, 1 reply; 5+ messages in thread
From: Jeff Garzik @ 2005-10-06 10:09 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide@vger.kernel.org, Douglas Gilbert
[-- Attachment #1: Type: text/plain, Size: 6449 bytes --]
Tejun Heo wrote:
> Hello, Jeff.
>
> On Wed, Oct 05, 2005 at 04:51:19PM -0400, Jeff Garzik wrote:
>
>>This is a work-in-progress, attempting to fix the death of libata
>>whenever there is a SCSI error thrown to us... IOW this fixes libata
>>EH to properly fake autosensing, rather than kicking over to the SCSI EH
>>thread for that purpose.
>>
>
>
> Great. I just have a few questions. Oh.. BTW, against which head is
> the patch generated? I couldn't get it applied cleanly to either
> upstream or ALL.
I applied fragments of the patch to 'upstream' branch, so you probably
caught me after I had done that, which invalidated the patch. I've
attached the latest patch.
>>@@ -3174,17 +3106,35 @@ out:
>> void ata_eng_timeout(struct ata_port *ap)
>> {
>> struct ata_queued_cmd *qc;
>>+ struct ata_host_set *host_set = ap->host_set;
>>+ unsigned long flags;
>>+ int printed_message = 0;
>>
>> DPRINTK("ENTER\n");
>>
>>+ spin_lock_irqsave(&host_set->lock, flags);
>>+ while (ap->flags & ATA_FLAG_IN_EH) {
>>+ spin_unlock_irqrestore(&host_set->lock, flags);
>>+
>>+ if (!printed_message++)
>>+ printk(KERN_INFO "ata%u: waiting in timeout handler for EH\n",
>>+ ap->id);
>>+
>>+ msleep(250);
>>+
>>+ spin_lock_irqsave(&host_set->lock, flags);
>>+ }
>>+ spin_unlock_irqrestore(&host_set->lock, flags);
>>+
>
>
>
> I'm not really sure if synchronization around reqsense with
> ATA_FLAG_IN_EH is such a good idea for the following reasons.
>
> * IMHO, tugging in request sense into the timeout window of the
> original command is okay (or even better).
Not really: we have to assume that time passed, so by relying on the
SCSI EH timeout rather than a timer, the timeout is suddenly effectively
random. I want a definitive time length, since its likely REQUEST SENSE
may need a longer timeout for some devices.
Second, the timeout for READ/WRITE commands is often smaller than it is
for non-READ/WRITE commands. We want much to ensure that we have
sufficient time to handle REQUEST SENSE.
> * As long as we grab host_set lock while submitting request sense,
> which we already does, the synchronization can be done safely in EH
> proper. libata EH isn't currently ready for this but other commands
> are dealt the same way and fixing EH is the correct way, IMHO.
We grab the lock submitting request sense, but then we release it.
ata_eng_timeout() could get called before the entire REQUEST SENSE
process is completed.
> Note that we really don't have to do anything special for timing out
> during request sense. Once we fix EH/interrupt handler race, it can
> be handled the same way without any speical casing.
Yes, this is feasible. We can simply wait for the timer (which doesn't
exist yet) or atapi_qc_complete_sense() to clear the ATA_FLAG_IN_EH bit,
at which point the EH timer takes over, or will do so shortly.
>> qc = ata_qc_from_tag(ap, ap->active_tag);
>>- if (!qc) {
>>+ if (qc)
>>+ ata_qc_timeout(qc);
>>+ else {
>> printk(KERN_ERR "ata%u: BUG: timeout without command\n",
>> ap->id);
>> goto out;
>> }
>>
>>- ata_qc_timeout(qc);
>>
>> out:
>> DPRINTK("EXIT\n");
>
>
>
> I think you were trying to kill goto out and the label by moving
> ata_qc_timeout() into if condition, no? Currently, the goto out seem
> a bit funny.
The goto is gone in the most recent version :)
>>diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
>>--- a/drivers/scsi/libata-scsi.c
>>+++ b/drivers/scsi/libata-scsi.c
>>@@ -225,7 +225,7 @@ void ata_to_sense_error(struct ata_queue
>> };
>> int i = 0;
>>
>>- cmd->result = SAM_STAT_CHECK_CONDITION;
>>+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>
>> /*
>> * Is this an error we can process/parse
>>@@ -1468,7 +1468,7 @@ unsigned int ata_scsiop_report_luns(stru
>> void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq)
>> {
>> DPRINTK("ENTER\n");
>>- cmd->result = SAM_STAT_CHECK_CONDITION;
>>+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>
>> cmd->sense_buffer[0] = 0x70;
>> cmd->sense_buffer[2] = ILLEGAL_REQUEST;
>>@@ -1479,28 +1479,81 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
>> done(cmd);
>> }
>>
>>+static int atapi_qc_complete_sense(struct ata_queued_cmd *qc, u8 drv_stat)
>>+{
>>+ struct scsi_cmnd *cmd = qc->scsicmd;
>>+
>>+ if (ata_ok(drv_stat))
>>+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>+ else
>>+ ata_to_sense_error(qc, drv_stat);
>>+
>>+ qc->ap->flags &= ~ATA_FLAG_IN_EH;
>>+ qc->scsidone(cmd);
>>+ return 0;
>>+}
>>+
>>+static void atapi_request_sense(struct ata_queued_cmd *qc)
>>+{
>>+ struct ata_port *ap = qc->ap;
>>+ struct scsi_cmnd *cmd = qc->scsicmd;
>>+ int rc;
>>+
>>+ DPRINTK("ATAPI request sense\n");
>>+
>>+ /* FIXME: is this needed? */
>>+ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
>
>
>
> This is just a side note as this part seems to be just copied from
> the original request_sense function. Anyways, I think this isn't
> needed anymore. SCSI ML seems to clear things pretty thoroughly these
> days.
Well, until I have the line of code pointed to me, its not important.
Its not the hot path, so we can afford to do possibly-redundant work.
>>+ ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
>>+ qc->dma_dir = DMA_FROM_DEVICE;
>>+
>>+ memset(&qc->cdb, 0, ap->cdb_len);
>>+ qc->cdb[0] = REQUEST_SENSE;
>>+ qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
>>+
>>+ qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
>>+ qc->tf.command = ATA_CMD_PACKET;
>>+
>>+ qc->tf.protocol = ATA_PROT_ATAPI;
>>+ qc->tf.lbam = (8 * 1024) & 0xff;
>>+ qc->tf.lbah = (8 * 1024) >> 8;
>>+ qc->nbytes = SCSI_SENSE_BUFFERSIZE;
>>+
>>+ qc->complete_fn = atapi_qc_complete_sense;
>>+
>>+ rc = ata_qc_issue(qc);
>>+
>>+ if (rc) {
>>+ /* FIXME: complete failing command with an error */
>>+ ata_port_disable(ap);
>>+ }
>>+
>>+ /* FIXME: add timer for timeout of this command */
>>+ /* control flow continues in atapi_qc_complete_sense() */
>>+
>>+ DPRINTK("EXIT\n");
>>+}
>>+
>> static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
>> {
>>+ struct ata_port *ap = qc->ap;
>> struct scsi_cmnd *cmd = qc->scsicmd;
>>
>>+ rmb();
>
>
>
> I'm having difficult time understanding this rmb(). What is it
> protecting against?
Just paranoia, before reading ap->host->eh_active, which is tweaked by
scsi_error.c.
Jeff
[-- Attachment #2: patch.atapi-autosense --]
[-- Type: text/plain, Size: 6775 bytes --]
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -71,6 +71,7 @@ static int fgb(u32 bitmap);
static int ata_choose_xfer_mode(struct ata_port *ap,
u8 *xfer_mode_out,
unsigned int *xfer_shift_out);
+static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat);
static void __ata_qc_complete(struct ata_queued_cmd *qc);
static unsigned int ata_unique_id = 1;
@@ -3037,32 +3038,11 @@ static void ata_qc_timeout(struct ata_qu
{
struct ata_port *ap = qc->ap;
struct ata_host_set *host_set = ap->host_set;
- struct ata_device *dev = qc->dev;
u8 host_stat = 0, drv_stat;
unsigned long flags;
DPRINTK("ENTER\n");
- /* FIXME: doesn't this conflict with timeout handling? */
- if (qc->dev->class == ATA_DEV_ATAPI && qc->scsicmd) {
- struct scsi_cmnd *cmd = qc->scsicmd;
-
- if (!(cmd->eh_eflags & SCSI_EH_CANCEL_CMD)) {
-
- /* finish completing original command */
- spin_lock_irqsave(&host_set->lock, flags);
- __ata_qc_complete(qc);
- spin_unlock_irqrestore(&host_set->lock, flags);
-
- atapi_request_sense(ap, dev, cmd);
-
- cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
- scsi_finish_command(cmd);
-
- goto out;
- }
- }
-
spin_lock_irqsave(&host_set->lock, flags);
/* hack alert! We cannot use the supplied completion
@@ -3101,7 +3081,6 @@ static void ata_qc_timeout(struct ata_qu
spin_unlock_irqrestore(&host_set->lock, flags);
-out:
DPRINTK("EXIT\n");
}
@@ -3127,19 +3106,33 @@ out:
void ata_eng_timeout(struct ata_port *ap)
{
struct ata_queued_cmd *qc;
+ struct ata_host_set *host_set = ap->host_set;
+ unsigned long flags;
+ int printed_message = 0;
DPRINTK("ENTER\n");
+ spin_lock_irqsave(&host_set->lock, flags);
+ while (ap->flags & ATA_FLAG_IN_EH) {
+ spin_unlock_irqrestore(&host_set->lock, flags);
+
+ if (!printed_message++)
+ printk(KERN_INFO "ata%u: waiting in timeout handler for EH\n",
+ ap->id);
+
+ msleep(250);
+
+ spin_lock_irqsave(&host_set->lock, flags);
+ }
+ spin_unlock_irqrestore(&host_set->lock, flags);
+
qc = ata_qc_from_tag(ap, ap->active_tag);
if (qc)
ata_qc_timeout(qc);
- else {
+ else
printk(KERN_ERR "ata%u: BUG: timeout without command\n",
ap->id);
- goto out;
- }
-out:
DPRINTK("EXIT\n");
}
@@ -3207,7 +3200,7 @@ struct ata_queued_cmd *ata_qc_new_init(s
return qc;
}
-int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat)
+static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat)
{
return 0;
}
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1479,19 +1479,33 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
done(cmd);
}
-void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
- struct scsi_cmnd *cmd)
+static int atapi_qc_complete_sense(struct ata_queued_cmd *qc, u8 drv_stat)
{
- DECLARE_COMPLETION(wait);
- struct ata_queued_cmd *qc;
- unsigned long flags;
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ int ok;
+
+ ok = ata_ok(drv_stat);
+ if (ok)
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+ else
+ ata_to_sense_error(qc, drv_stat);
+
+ DPRINTK("ATAPI request sense completion (sense %sretrieved)\n",
+ ok ? "" : "not ");
+
+ qc->ap->flags &= ~ATA_FLAG_IN_EH;
+ qc->scsidone(cmd);
+ return 0;
+}
+
+static void atapi_request_sense(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct scsi_cmnd *cmd = qc->scsicmd;
int rc;
DPRINTK("ATAPI request sense\n");
- qc = ata_qc_new_init(ap, dev);
- BUG_ON(qc == NULL);
-
/* FIXME: is this needed? */
memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
@@ -1510,44 +1524,39 @@ void atapi_request_sense(struct ata_port
qc->tf.lbah = (8 * 1024) >> 8;
qc->nbytes = SCSI_SENSE_BUFFERSIZE;
- qc->waiting = &wait;
- qc->complete_fn = ata_qc_complete_noop;
+ qc->complete_fn = atapi_qc_complete_sense;
- spin_lock_irqsave(&ap->host_set->lock, flags);
rc = ata_qc_issue(qc);
- spin_unlock_irqrestore(&ap->host_set->lock, flags);
- if (rc)
+ if (rc) {
+ /* FIXME: complete failing command with an error */
ata_port_disable(ap);
- else
- wait_for_completion(&wait);
+ }
+
+ /* FIXME: add timer for timeout of this command */
+ /* control flow continues in atapi_qc_complete_sense() */
DPRINTK("EXIT\n");
}
static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
{
+ struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
VPRINTK("ENTER, drv_stat == 0x%x\n", drv_stat);
+ rmb();
+ if (ap->host->eh_active)
+ return 1;
+
if (unlikely(drv_stat & (ATA_BUSY | ATA_DRQ)))
ata_to_sense_error(qc, drv_stat);
else if (unlikely(drv_stat & ATA_ERR)) {
DPRINTK("request check condition\n");
-
- /* FIXME: command completion with check condition
- * but no sense causes the error handler to run,
- * which then issues REQUEST SENSE, fills in the sense
- * buffer, and completes the command (for the second
- * time). We need to issue REQUEST SENSE some other
- * way, to avoid completing the command twice.
- */
- cmd->result = SAM_STAT_CHECK_CONDITION;
-
- qc->scsidone(cmd);
-
+ ap->flags |= ATA_FLAG_IN_EH;
+ atapi_request_sense(qc);
return 1;
}
@@ -1585,6 +1594,7 @@ static int atapi_qc_complete(struct ata_
qc->scsidone(cmd);
return 0;
}
+
/**
* atapi_xlat - Initialize PACKET taskfile
* @qc: command structure to be initialized
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -39,7 +39,6 @@ struct ata_scsi_args {
/* libata-core.c */
extern int atapi_enabled;
-extern int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat);
extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
struct ata_device *dev);
extern void ata_qc_free(struct ata_queued_cmd *qc);
@@ -52,8 +51,6 @@ extern void swap_buf_le16(u16 *buf, unsi
/* libata-scsi.c */
-extern void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
- struct scsi_cmnd *cmd);
extern void ata_scsi_scan_host(struct ata_port *ap);
extern void ata_to_sense_error(struct ata_queued_cmd *qc, u8 drv_stat);
extern int ata_scsi_error(struct Scsi_Host *host);
diff --git a/include/linux/libata.h b/include/linux/libata.h
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -118,6 +118,7 @@ enum {
ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */
ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once
* proper HSM is in place. */
+ ATA_FLAG_IN_EH = (1 << 10),
ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ATAPI error handling work
2005-10-06 10:09 ` Jeff Garzik
@ 2005-10-06 14:56 ` Tejun Heo
2005-10-29 18:33 ` Jeff Garzik
0 siblings, 1 reply; 5+ messages in thread
From: Tejun Heo @ 2005-10-06 14:56 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide@vger.kernel.org, Douglas Gilbert
Hello, Jeff.
Jeff Garzik wrote:
> Tejun Heo wrote:
[-snip-]
>>
>> I'm not really sure if synchronization around reqsense with
>> ATA_FLAG_IN_EH is such a good idea for the following reasons.
>>
>> * IMHO, tugging in request sense into the timeout window of the
>> original command is okay (or even better).
>
>
> Not really: we have to assume that time passed, so by relying on the
> SCSI EH timeout rather than a timer, the timeout is suddenly effectively
> random. I want a definitive time length, since its likely REQUEST SENSE
> may need a longer timeout for some devices.
>
> Second, the timeout for READ/WRITE commands is often smaller than it is
> for non-READ/WRITE commands. We want much to ensure that we have
> sufficient time to handle REQUEST SENSE.
>
Ah.. I see. I was assuming
- If a device fails a command (not timing out), it probably does it in
reasonable amount of time.
- REQUEST_SENSE wouldn't take very long.
I guess my assumptions were too hopeful. :-(
>
>> * As long as we grab host_set lock while submitting request sense,
>> which we already does, the synchronization can be done safely in EH
>> proper. libata EH isn't currently ready for this but other commands
>> are dealt the same way and fixing EH is the correct way, IMHO.
>
>
> We grab the lock submitting request sense, but then we release it.
> ata_eng_timeout() could get called before the entire REQUEST SENSE
> process is completed.
>
>
>> Note that we really don't have to do anything special for timing out
>> during request sense. Once we fix EH/interrupt handler race, it can
>> be handled the same way without any speical casing.
>
>
> Yes, this is feasible. We can simply wait for the timer (which doesn't
> exist yet) or atapi_qc_complete_sense() to clear the ATA_FLAG_IN_EH bit,
> at which point the EH timer takes over, or will do so shortly.
>
As I wrote in the libata EH doc and associated thread, I don't think
doing autosensing-by-turning-qc-around is a very good idea. I hope I
can make some point here.
In the other thread, you wrote that the current ATAPI sense
requisition is corrupt because it finishes scmd twice and I believe this
patch was implemented to fix that. I disagreed with you in that thread
and gave my rationale for that. If I was wrong in that thread, please
ignore what follows.
Reasons why I think doing auto-sensing isn't such a good idea.
* I think that not having qc-turn-around is cleaner and easier to
maintain.
* We need EH anyway and sense requesting can be easily done with EH
mechanisms. Command handoff to EH is definitely necessary for all EH
cases, so ATAPI command handoff can be done just the same. EH
needs to issue commands and also need timeouts. We can use the same
mechanism for REQUEST SENSE. No special timer or flag is needed. I
even have submitted a patch to add timeouts to all EH commands. IIRC,
I've also converted REQUEST_SENSE to have time out.
* Autosensing does have performance advantage. But it's not a hot path.
I doubt the performance advantage is anything worth of the added
complexity.
>
>>> qc = ata_qc_from_tag(ap, ap->active_tag);
>>> - if (!qc) {
>>> + if (qc)
>>> + ata_qc_timeout(qc);
>>> + else {
>>> printk(KERN_ERR "ata%u: BUG: timeout without command\n",
>>> ap->id);
>>> goto out;
>>> }
>>>
>>> - ata_qc_timeout(qc);
>>>
>>> out:
>>> DPRINTK("EXIT\n");
>>
>>
>>
>>
>> I think you were trying to kill goto out and the label by moving
>> ata_qc_timeout() into if condition, no? Currently, the goto out seem
>> a bit funny.
>
>
> The goto is gone in the most recent version :)
>
>
>>> diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
>>> --- a/drivers/scsi/libata-scsi.c
>>> +++ b/drivers/scsi/libata-scsi.c
>>> @@ -225,7 +225,7 @@ void ata_to_sense_error(struct ata_queue
>>> };
>>> int i = 0;
>>>
>>> - cmd->result = SAM_STAT_CHECK_CONDITION;
>>> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>>
>>> /*
>>> * Is this an error we can process/parse
>>> @@ -1468,7 +1468,7 @@ unsigned int ata_scsiop_report_luns(stru
>>> void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct
>>> scsi_cmnd *), u8 asc, u8 ascq)
>>> {
>>> DPRINTK("ENTER\n");
>>> - cmd->result = SAM_STAT_CHECK_CONDITION;
>>> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>>
>>> cmd->sense_buffer[0] = 0x70;
>>> cmd->sense_buffer[2] = ILLEGAL_REQUEST;
>>> @@ -1479,28 +1479,81 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
>>> done(cmd);
>>> }
>>>
>>> +static int atapi_qc_complete_sense(struct ata_queued_cmd *qc, u8
>>> drv_stat)
>>> +{
>>> + struct scsi_cmnd *cmd = qc->scsicmd;
>>> +
>>> + if (ata_ok(drv_stat))
>>> + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>>> + else
>>> + ata_to_sense_error(qc, drv_stat);
>>> +
>>> + qc->ap->flags &= ~ATA_FLAG_IN_EH;
>>> + qc->scsidone(cmd);
>>> + return 0;
>>> +}
>>> +
>>> +static void atapi_request_sense(struct ata_queued_cmd *qc)
>>> +{
>>> + struct ata_port *ap = qc->ap;
>>> + struct scsi_cmnd *cmd = qc->scsicmd;
>>> + int rc;
>>> +
>>> + DPRINTK("ATAPI request sense\n");
>>> +
>>> + /* FIXME: is this needed? */
>>> + memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
>>
>>
>>
>>
>> This is just a side note as this part seems to be just copied from
>> the original request_sense function. Anyways, I think this isn't
>> needed anymore. SCSI ML seems to clear things pretty thoroughly these
>> days.
>
>
> Well, until I have the line of code pointed to me, its not important.
> Its not the hot path, so we can afford to do possibly-redundant work.
>
It's in scsi_get_command and scsi_setup_cmd_retry. But I agree with
you that leaving it alone would be a better idea. Maybe chaning comment
will help clear things a bit? something like /* paranoia */?
>
>>> + ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
>>> + qc->dma_dir = DMA_FROM_DEVICE;
>>> +
>>> + memset(&qc->cdb, 0, ap->cdb_len);
>>> + qc->cdb[0] = REQUEST_SENSE;
>>> + qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
>>> +
>>> + qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
>>> + qc->tf.command = ATA_CMD_PACKET;
>>> +
>>> + qc->tf.protocol = ATA_PROT_ATAPI;
>>> + qc->tf.lbam = (8 * 1024) & 0xff;
>>> + qc->tf.lbah = (8 * 1024) >> 8;
>>> + qc->nbytes = SCSI_SENSE_BUFFERSIZE;
>>> +
>>> + qc->complete_fn = atapi_qc_complete_sense;
>>> +
>>> + rc = ata_qc_issue(qc);
>>> +
>>> + if (rc) {
>>> + /* FIXME: complete failing command with an error */
>>> + ata_port_disable(ap);
>>> + }
>>> +
>>> + /* FIXME: add timer for timeout of this command */
>>> + /* control flow continues in atapi_qc_complete_sense() */
>>> +
>>> + DPRINTK("EXIT\n");
>>> +}
>>> +
>>> static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
>>> {
>>> + struct ata_port *ap = qc->ap;
>>> struct scsi_cmnd *cmd = qc->scsicmd;
>>>
>>> + rmb();
>>
>>
>>
>>
>> I'm having difficult time understanding this rmb(). What is it
>> protecting against?
>
>
> Just paranoia, before reading ap->host->eh_active, which is tweaked by
> scsi_error.c.
Maybe it's just me, but codes with [r|w]mb's without clear context or
comment are among the most difficult codes to understand. And I
personally think that mb's should be used only when strictly necessary
because otherwise they are exteremely confusing later.
Maybe putting some comments there can help other guys like me spend
less time wondering about it?
Thanks. :-)
--
tejun
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ATAPI error handling work
2005-10-06 14:56 ` Tejun Heo
@ 2005-10-29 18:33 ` Jeff Garzik
0 siblings, 0 replies; 5+ messages in thread
From: Jeff Garzik @ 2005-10-29 18:33 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide@vger.kernel.org, Douglas Gilbert
Tejun Heo wrote:
> As I wrote in the libata EH doc and associated thread, I don't think
> doing autosensing-by-turning-qc-around is a very good idea. I hope I
> can make some point here.
I like autosensing, particularly because we want to get away from using
a lot of SCSI EH infrastructure, since libata will eventually be
separated from SCSI.
> In the other thread, you wrote that the current ATAPI sense requisition
> is corrupt because it finishes scmd twice and I believe this patch was
> implemented to fix that. I disagreed with you in that thread and gave
> my rationale for that. If I was wrong in that thread, please ignore
> what follows.
I correct myself: the corruption occurs because we obtain the scsi host
lock (our host_set lock), and then call scsi_finish_command(). This is
in error, because scsi_finish_command() also acquires the scsi host lock
via a call to scsi_device_unbusy().
Multiple possible solutions: separate scsi host lock from our own.
batching commands until the end of EH, as SCSI EH does. continue work
on auto-sensing. ...
Jeff
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2005-10-29 18:33 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-05 20:51 [PATCH] ATAPI error handling work Jeff Garzik
2005-10-06 5:35 ` Tejun Heo
2005-10-06 10:09 ` Jeff Garzik
2005-10-06 14:56 ` Tejun Heo
2005-10-29 18:33 ` Jeff Garzik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).