* [PATCH 02/13] libata: make the owner of a qc responsible for freeing it
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 04/13] ahci: fix err_mask setting in ahci_host_intr Tejun Heo
` (11 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
qc used to be freed automatically on command completion. However, as
a qc can carry information about its completion status, it can be
useful to its owner/issuer after command completion. This patch makes
freeing qc responsibility of its owner. This simplifies
ata_exec_internal() and makes command turn-around for atapi request
sensing less hackish.
This change was originally suggested by Jeff Garzik.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-core.c | 47 ++++++++++++--------------------------------
drivers/scsi/libata-scsi.c | 14 +++++++------
include/linux/libata.h | 2 +-
3 files changed, 21 insertions(+), 42 deletions(-)
643c29b12df5109d4c908a436c2ce7d3e1e212b2
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 0e49323..15df633 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1072,24 +1072,12 @@ static unsigned int ata_pio_modes(const
timing API will get this right anyway */
}
-struct ata_exec_internal_arg {
- unsigned int err_mask;
- struct ata_taskfile *tf;
- struct completion *waiting;
-};
-
-int ata_qc_complete_internal(struct ata_queued_cmd *qc)
+void ata_qc_complete_internal(struct ata_queued_cmd *qc)
{
- struct ata_exec_internal_arg *arg = qc->private_data;
- struct completion *waiting = arg->waiting;
+ struct completion *waiting = qc->private_data;
- if (!(qc->err_mask & ~AC_ERR_DEV))
- qc->ap->ops->tf_read(qc->ap, arg->tf);
- arg->err_mask = qc->err_mask;
- arg->waiting = NULL;
+ qc->ap->ops->tf_read(qc->ap, &qc->tf);
complete(waiting);
-
- return 0;
}
/**
@@ -1120,7 +1108,7 @@ ata_exec_internal(struct ata_port *ap, s
struct ata_queued_cmd *qc;
DECLARE_COMPLETION(wait);
unsigned long flags;
- struct ata_exec_internal_arg arg;
+ unsigned int err_mask;
spin_lock_irqsave(&ap->host_set->lock, flags);
@@ -1134,9 +1122,7 @@ ata_exec_internal(struct ata_port *ap, s
qc->nsect = buflen / ATA_SECT_SIZE;
}
- arg.waiting = &wait;
- arg.tf = tf;
- qc->private_data = &arg;
+ qc->private_data = &wait;
qc->complete_fn = ata_qc_complete_internal;
if (ata_qc_issue(qc))
@@ -1153,7 +1139,7 @@ ata_exec_internal(struct ata_port *ap, s
* before the caller cleans up, it will result in a
* spurious interrupt. We can live with that.
*/
- if (arg.waiting) {
+ if (qc->flags & ATA_QCFLAG_ACTIVE) {
qc->err_mask = AC_ERR_OTHER;
ata_qc_complete(qc);
printk(KERN_WARNING "ata%u: qc timeout (cmd 0x%x)\n",
@@ -1163,7 +1149,12 @@ ata_exec_internal(struct ata_port *ap, s
spin_unlock_irqrestore(&ap->host_set->lock, flags);
}
- return arg.err_mask;
+ *tf = qc->tf;
+ err_mask = qc->err_mask;
+
+ ata_qc_free(qc);
+
+ return err_mask;
issue_fail:
ata_qc_free(qc);
@@ -3633,8 +3624,6 @@ void ata_qc_free(struct ata_queued_cmd *
void ata_qc_complete(struct ata_queued_cmd *qc)
{
- int rc;
-
assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
assert(qc->flags & ATA_QCFLAG_ACTIVE);
@@ -3648,17 +3637,7 @@ void ata_qc_complete(struct ata_queued_c
qc->flags &= ~ATA_QCFLAG_ACTIVE;
/* call completion callback */
- rc = qc->complete_fn(qc);
-
- /* if callback indicates not to complete command (non-zero),
- * return immediately
- */
- if (rc != 0)
- return;
-
- ata_qc_free(qc);
-
- VPRINTK("EXIT\n");
+ qc->complete_fn(qc);
}
static inline int ata_should_dma_map(struct ata_queued_cmd *qc)
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 0e65bfe..ce3fe92 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1219,7 +1219,7 @@ nothing_to_do:
return 1;
}
-static int ata_scsi_qc_complete(struct ata_queued_cmd *qc)
+static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
@@ -1256,7 +1256,7 @@ static int ata_scsi_qc_complete(struct a
qc->scsidone(cmd);
- return 0;
+ ata_qc_free(qc);
}
/**
@@ -1982,7 +1982,7 @@ void ata_scsi_badcmd(struct scsi_cmnd *c
done(cmd);
}
-static int atapi_sense_complete(struct ata_queued_cmd *qc)
+static void atapi_sense_complete(struct ata_queued_cmd *qc)
{
if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0))
/* FIXME: not quite right; we don't want the
@@ -1993,7 +1993,7 @@ static int atapi_sense_complete(struct a
ata_gen_ata_desc_sense(qc);
qc->scsidone(qc->scsicmd);
- return 0;
+ ata_qc_free(qc);
}
/* is it pointless to prefer PIO for "safety reasons"? */
@@ -2050,7 +2050,7 @@ static void atapi_request_sense(struct a
DPRINTK("EXIT\n");
}
-static int atapi_qc_complete(struct ata_queued_cmd *qc)
+static void atapi_qc_complete(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *cmd = qc->scsicmd;
unsigned int err_mask = qc->err_mask;
@@ -2060,7 +2060,7 @@ static int atapi_qc_complete(struct ata_
if (unlikely(err_mask & AC_ERR_DEV)) {
cmd->result = SAM_STAT_CHECK_CONDITION;
atapi_request_sense(qc);
- return 1;
+ return;
}
else if (unlikely(err_mask))
@@ -2100,7 +2100,7 @@ static int atapi_qc_complete(struct ata_
}
qc->scsidone(cmd);
- return 0;
+ ata_qc_free(qc);
}
/**
* atapi_xlat - Initialize PACKET taskfile
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 46ccea2..d58b659 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -235,7 +235,7 @@ struct ata_port;
struct ata_queued_cmd;
/* typedefs */
-typedef int (*ata_qc_cb_t) (struct ata_queued_cmd *qc);
+typedef void (*ata_qc_cb_t) (struct ata_queued_cmd *qc);
struct ata_ioports {
unsigned long cmd_addr;
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 04/13] ahci: fix err_mask setting in ahci_host_intr
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
2006-01-23 4:09 ` [PATCH 02/13] libata: make the owner of a qc responsible for freeing it Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-27 3:36 ` Jeff Garzik
2006-01-23 4:09 ` [PATCH 03/13] libata: fix ata_qc_issue() error handling Tejun Heo
` (10 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
In ahci_host_intr err_mask is determined from IRQ status but never
used. This patch sets qc->err_mask to the determined err_mask.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/ahci.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
72a3f6348bd748686e9fdc396dbf05a877da3d91
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index 30676b0..a168b52 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -721,7 +721,7 @@ static inline int ahci_host_intr(struct
ahci_restart_port(ap, status);
if (qc) {
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask |= err_mask;
ata_qc_complete(qc);
}
}
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 03/13] libata: fix ata_qc_issue() error handling
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
2006-01-23 4:09 ` [PATCH 02/13] libata: make the owner of a qc responsible for freeing it Tejun Heo
2006-01-23 4:09 ` [PATCH 04/13] ahci: fix err_mask setting in ahci_host_intr Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 05/13] libata: add detailed AC_ERR_* flags Tejun Heo
` (9 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
When ata_qc_issue() fails, the qc might have been dma mapped or not.
So, performing only ata_qc_free() results in dma map leak. This patch
makes ata_qc_issue() mark dma map flags correctly on failure and calls
ata_qc_complete() after ata_qc_issue() fails.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-core.c | 18 ++++++++----------
drivers/scsi/libata-scsi.c | 6 ++++--
2 files changed, 12 insertions(+), 12 deletions(-)
cea7412270f60847981a01eebbb12b4a940ec5fb
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 15df633..43a2328 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1125,8 +1125,10 @@ ata_exec_internal(struct ata_port *ap, s
qc->private_data = &wait;
qc->complete_fn = ata_qc_complete_internal;
- if (ata_qc_issue(qc))
- goto issue_fail;
+ if (ata_qc_issue(qc)) {
+ qc->err_mask = AC_ERR_OTHER;
+ ata_qc_complete(qc);
+ }
spin_unlock_irqrestore(&ap->host_set->lock, flags);
@@ -1155,11 +1157,6 @@ ata_exec_internal(struct ata_port *ap, s
ata_qc_free(qc);
return err_mask;
-
- issue_fail:
- ata_qc_free(qc);
- spin_unlock_irqrestore(&ap->host_set->lock, flags);
- return AC_ERR_OTHER;
}
/**
@@ -3687,10 +3684,10 @@ int ata_qc_issue(struct ata_queued_cmd *
if (ata_should_dma_map(qc)) {
if (qc->flags & ATA_QCFLAG_SG) {
if (ata_sg_setup(qc))
- goto err_out;
+ goto sg_err;
} else if (qc->flags & ATA_QCFLAG_SINGLE) {
if (ata_sg_setup_one(qc))
- goto err_out;
+ goto sg_err;
}
} else {
qc->flags &= ~ATA_QCFLAG_DMAMAP;
@@ -3703,7 +3700,8 @@ int ata_qc_issue(struct ata_queued_cmd *
return ap->ops->qc_issue(qc);
-err_out:
+sg_err:
+ qc->flags &= ~ATA_QCFLAG_DMAMAP;
return -1;
}
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index ce3fe92..c496309 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1322,8 +1322,10 @@ static void ata_scsi_translate(struct at
goto early_finish;
/* select device, send command to hardware */
- if (ata_qc_issue(qc))
- goto err_did;
+ if (ata_qc_issue(qc)) {
+ qc->err_mask |= AC_ERR_OTHER;
+ ata_qc_complete(qc);
+ }
VPRINTK("EXIT\n");
return;
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 05/13] libata: add detailed AC_ERR_* flags
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (2 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 03/13] libata: fix ata_qc_issue() error handling Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 06/13] libata: return AC_ERR_* from issue functions Tejun Heo
` (8 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
Add detailed AC_ERR_* flags and use them. Long-term goal is to
describe all errors with err_mask and tf combination (tf for failed
sector information, etc...). After proper error diagnosis is
implemented, sense data should also be generated from err_mask instead
of directly from hardware tf registers as it is currently.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/ahci.c | 2 +-
drivers/scsi/libata-core.c | 12 ++++++------
drivers/scsi/sata_mv.c | 2 +-
drivers/scsi/sata_sil24.c | 2 +-
include/linux/libata.h | 15 ++++++++++-----
5 files changed, 19 insertions(+), 14 deletions(-)
9119a06851d70767d3c97c6fb707751fd8645f49
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index a168b52..bb3686a 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -680,7 +680,7 @@ static void ahci_eng_timeout(struct ata_
* not being called from the SCSI EH.
*/
qc->scsidone = scsi_finish_command;
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask |= AC_ERR_TIMEOUT;
ata_qc_complete(qc);
}
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 43a2328..f5519f0 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1142,7 +1142,7 @@ ata_exec_internal(struct ata_port *ap, s
* spurious interrupt. We can live with that.
*/
if (qc->flags & ATA_QCFLAG_ACTIVE) {
- qc->err_mask = AC_ERR_OTHER;
+ qc->err_mask = AC_ERR_TIMEOUT;
ata_qc_complete(qc);
printk(KERN_WARNING "ata%u: qc timeout (cmd 0x%x)\n",
ap->id, command);
@@ -2917,7 +2917,7 @@ static unsigned long ata_pio_poll(struct
status = ata_chk_status(ap);
if (status & ATA_BUSY) {
if (time_after(jiffies, ap->pio_task_timeout)) {
- qc->err_mask |= AC_ERR_ATA_BUS;
+ qc->err_mask |= AC_ERR_TIMEOUT;
ap->hsm_task_state = HSM_ST_TMOUT;
return 0;
}
@@ -3295,7 +3295,7 @@ static void atapi_pio_bytes(struct ata_q
err_out:
printk(KERN_INFO "ata%u: dev %u: ATAPI check failed\n",
ap->id, dev->devno);
- qc->err_mask |= AC_ERR_ATA_BUS;
+ qc->err_mask |= AC_ERR_HSM;
ap->hsm_task_state = HSM_ST_ERR;
}
@@ -3353,7 +3353,7 @@ static void ata_pio_block(struct ata_por
} else {
/* handle BSY=0, DRQ=0 as error */
if ((status & ATA_DRQ) == 0) {
- qc->err_mask |= AC_ERR_ATA_BUS;
+ qc->err_mask |= AC_ERR_HSM;
ap->hsm_task_state = HSM_ST_ERR;
return;
}
@@ -4159,14 +4159,14 @@ static void atapi_packet_task(void *_dat
/* sleep-wait for BSY to clear */
DPRINTK("busy wait\n");
if (ata_busy_sleep(ap, ATA_TMOUT_CDB_QUICK, ATA_TMOUT_CDB)) {
- qc->err_mask |= AC_ERR_ATA_BUS;
+ qc->err_mask |= AC_ERR_TIMEOUT;
goto err_out;
}
/* make sure DRQ is set */
status = ata_chk_status(ap);
if ((status & (ATA_BUSY | ATA_DRQ)) != ATA_DRQ) {
- qc->err_mask |= AC_ERR_ATA_BUS;
+ qc->err_mask |= AC_ERR_HSM;
goto err_out;
}
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index cd54244..89bcd85 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -1866,7 +1866,7 @@ static void mv_eng_timeout(struct ata_po
*/
spin_lock_irqsave(&ap->host_set->lock, flags);
qc->scsidone = scsi_finish_command;
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask |= AC_ERR_TIMEOUT;
ata_qc_complete(qc);
spin_unlock_irqrestore(&ap->host_set->lock, flags);
}
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index 9231301..fb59012 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -653,7 +653,7 @@ static void sil24_eng_timeout(struct ata
*/
printk(KERN_ERR "ata%u: command timeout\n", ap->id);
qc->scsidone = scsi_finish_command;
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask |= AC_ERR_TIMEOUT;
ata_qc_complete(qc);
sil24_reset_controller(ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d58b659..8ff3a7f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -222,10 +222,15 @@ enum hsm_task_states {
};
enum ata_completion_errors {
- AC_ERR_OTHER = (1 << 0),
- AC_ERR_DEV = (1 << 1),
- AC_ERR_ATA_BUS = (1 << 2),
- AC_ERR_HOST_BUS = (1 << 3),
+ AC_ERR_DEV = (1 << 0), /* device reported error */
+ AC_ERR_HSM = (1 << 1), /* host state machine violation */
+ AC_ERR_TIMEOUT = (1 << 2), /* timeout */
+ AC_ERR_MEDIA = (1 << 3), /* media error */
+ AC_ERR_ATA_BUS = (1 << 4), /* ATA bus error */
+ AC_ERR_HOST_BUS = (1 << 5), /* host bus error */
+ AC_ERR_SYSTEM = (1 << 6), /* system error */
+ AC_ERR_INVALID = (1 << 7), /* invalid argument */
+ AC_ERR_OTHER = (1 << 8), /* unknown */
};
/* forward declarations */
@@ -833,7 +838,7 @@ static inline int ata_try_flush_cache(co
static inline unsigned int ac_err_mask(u8 status)
{
if (status & ATA_BUSY)
- return AC_ERR_ATA_BUS;
+ return AC_ERR_HSM;
if (status & (ATA_ERR | ATA_DF))
return AC_ERR_DEV;
return 0;
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 06/13] libata: return AC_ERR_* from issue functions
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (3 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 05/13] libata: add detailed AC_ERR_* flags Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q() Tejun Heo
` (7 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
Return AC_ERR_* mask from issue fuctions instead of 0/-1. This
enables things like failing a qc with AC_ERR_HSM when the device
doesn't set DRDY when the qc is about to be issued.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/ahci.c | 4 ++--
drivers/scsi/libata-core.c | 17 ++++++++---------
drivers/scsi/libata-scsi.c | 10 ++++------
drivers/scsi/libata.h | 2 +-
drivers/scsi/pdc_adma.c | 4 ++--
drivers/scsi/sata_mv.c | 4 ++--
drivers/scsi/sata_promise.c | 4 ++--
drivers/scsi/sata_qstor.c | 4 ++--
drivers/scsi/sata_sil24.c | 4 ++--
drivers/scsi/sata_sx4.c | 4 ++--
include/linux/libata.h | 4 ++--
11 files changed, 29 insertions(+), 32 deletions(-)
0bf21774b17d3531aface40e56b34e2732dc35d5
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index bb3686a..0f6e4dd 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -184,7 +184,7 @@ struct ahci_port_priv {
static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
static void ahci_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
-static int ahci_qc_issue(struct ata_queued_cmd *qc);
+static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc);
static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
static void ahci_phy_reset(struct ata_port *ap);
static void ahci_irq_clear(struct ata_port *ap);
@@ -800,7 +800,7 @@ static irqreturn_t ahci_interrupt (int i
return IRQ_RETVAL(handled);
}
-static int ahci_qc_issue(struct ata_queued_cmd *qc)
+static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
void __iomem *port_mmio = (void __iomem *) ap->ioaddr.cmd_addr;
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index f5519f0..b29bf0d 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1125,10 +1125,9 @@ ata_exec_internal(struct ata_port *ap, s
qc->private_data = &wait;
qc->complete_fn = ata_qc_complete_internal;
- if (ata_qc_issue(qc)) {
- qc->err_mask = AC_ERR_OTHER;
+ qc->err_mask = ata_qc_issue(qc);
+ if (qc->err_mask)
ata_qc_complete(qc);
- }
spin_unlock_irqrestore(&ap->host_set->lock, flags);
@@ -3674,10 +3673,10 @@ static inline int ata_should_dma_map(str
* spin_lock_irqsave(host_set lock)
*
* RETURNS:
- * Zero on success, negative on error.
+ * Zero on success, AC_ERR_* mask on failure
*/
-int ata_qc_issue(struct ata_queued_cmd *qc)
+unsigned int ata_qc_issue(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
@@ -3702,7 +3701,7 @@ int ata_qc_issue(struct ata_queued_cmd *
sg_err:
qc->flags &= ~ATA_QCFLAG_DMAMAP;
- return -1;
+ return AC_ERR_SYSTEM;
}
@@ -3721,10 +3720,10 @@ sg_err:
* spin_lock_irqsave(host_set lock)
*
* RETURNS:
- * Zero on success, negative on error.
+ * Zero on success, AC_ERR_* mask on failure
*/
-int ata_qc_issue_prot(struct ata_queued_cmd *qc)
+unsigned int ata_qc_issue_prot(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
@@ -3769,7 +3768,7 @@ int ata_qc_issue_prot(struct ata_queued_
default:
WARN_ON(1);
- return -1;
+ return AC_ERR_SYSTEM;
}
return 0;
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index c496309..95e3c27 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1322,10 +1322,9 @@ static void ata_scsi_translate(struct at
goto early_finish;
/* select device, send command to hardware */
- if (ata_qc_issue(qc)) {
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask = ata_qc_issue(qc);
+ if (qc->err_mask)
ata_qc_complete(qc);
- }
VPRINTK("EXIT\n");
return;
@@ -2044,10 +2043,9 @@ static void atapi_request_sense(struct a
qc->complete_fn = atapi_sense_complete;
- if (ata_qc_issue(qc)) {
- qc->err_mask |= AC_ERR_OTHER;
+ qc->err_mask = ata_qc_issue(qc);
+ if (qc->err_mask)
ata_qc_complete(qc);
- }
DPRINTK("EXIT\n");
}
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
index e03ce48..9d76923 100644
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -45,7 +45,7 @@ extern struct ata_queued_cmd *ata_qc_new
struct ata_device *dev);
extern int ata_rwcmd_protocol(struct ata_queued_cmd *qc);
extern void ata_qc_free(struct ata_queued_cmd *qc);
-extern int ata_qc_issue(struct ata_queued_cmd *qc);
+extern unsigned int ata_qc_issue(struct ata_queued_cmd *qc);
extern int ata_check_atapi_dma(struct ata_queued_cmd *qc);
extern void ata_dev_select(struct ata_port *ap, unsigned int device,
unsigned int wait, unsigned int can_sleep);
diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c
index e8df0c9..3a6bf58 100644
--- a/drivers/scsi/pdc_adma.c
+++ b/drivers/scsi/pdc_adma.c
@@ -131,7 +131,7 @@ static void adma_host_stop(struct ata_ho
static void adma_port_stop(struct ata_port *ap);
static void adma_phy_reset(struct ata_port *ap);
static void adma_qc_prep(struct ata_queued_cmd *qc);
-static int adma_qc_issue(struct ata_queued_cmd *qc);
+static unsigned int adma_qc_issue(struct ata_queued_cmd *qc);
static int adma_check_atapi_dma(struct ata_queued_cmd *qc);
static void adma_bmdma_stop(struct ata_queued_cmd *qc);
static u8 adma_bmdma_status(struct ata_port *ap);
@@ -419,7 +419,7 @@ static inline void adma_packet_start(str
writew(aPIOMD4 | aGO, chan + ADMA_CONTROL);
}
-static int adma_qc_issue(struct ata_queued_cmd *qc)
+static unsigned int adma_qc_issue(struct ata_queued_cmd *qc)
{
struct adma_port_priv *pp = qc->ap->private_data;
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index 89bcd85..281223a 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -328,7 +328,7 @@ static void mv_host_stop(struct ata_host
static int mv_port_start(struct ata_port *ap);
static void mv_port_stop(struct ata_port *ap);
static void mv_qc_prep(struct ata_queued_cmd *qc);
-static int mv_qc_issue(struct ata_queued_cmd *qc);
+static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
static irqreturn_t mv_interrupt(int irq, void *dev_instance,
struct pt_regs *regs);
static void mv_eng_timeout(struct ata_port *ap);
@@ -1040,7 +1040,7 @@ static void mv_qc_prep(struct ata_queued
* LOCKING:
* Inherited from caller.
*/
-static int mv_qc_issue(struct ata_queued_cmd *qc)
+static unsigned int mv_qc_issue(struct ata_queued_cmd *qc)
{
void __iomem *port_mmio = mv_ap_base(qc->ap);
struct mv_port_priv *pp = qc->ap->private_data;
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index b0b0a69..bac36d5 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -95,7 +95,7 @@ static void pdc_qc_prep(struct ata_queue
static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static void pdc_irq_clear(struct ata_port *ap);
-static int pdc_qc_issue_prot(struct ata_queued_cmd *qc);
+static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc);
static struct scsi_host_template pdc_ata_sht = {
@@ -544,7 +544,7 @@ static inline void pdc_packet_start(stru
readl((void __iomem *) ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT); /* flush */
}
-static int pdc_qc_issue_prot(struct ata_queued_cmd *qc)
+static unsigned int pdc_qc_issue_prot(struct ata_queued_cmd *qc)
{
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c
index de05e28..2afbeb7 100644
--- a/drivers/scsi/sata_qstor.c
+++ b/drivers/scsi/sata_qstor.c
@@ -120,7 +120,7 @@ static void qs_host_stop(struct ata_host
static void qs_port_stop(struct ata_port *ap);
static void qs_phy_reset(struct ata_port *ap);
static void qs_qc_prep(struct ata_queued_cmd *qc);
-static int qs_qc_issue(struct ata_queued_cmd *qc);
+static unsigned int qs_qc_issue(struct ata_queued_cmd *qc);
static int qs_check_atapi_dma(struct ata_queued_cmd *qc);
static void qs_bmdma_stop(struct ata_queued_cmd *qc);
static u8 qs_bmdma_status(struct ata_port *ap);
@@ -352,7 +352,7 @@ static inline void qs_packet_start(struc
readl(chan + QS_CCT_CFF); /* flush */
}
-static int qs_qc_issue(struct ata_queued_cmd *qc)
+static unsigned int qs_qc_issue(struct ata_queued_cmd *qc)
{
struct qs_port_priv *pp = qc->ap->private_data;
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index fb59012..5a7a7b1 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -251,7 +251,7 @@ static void sil24_scr_write(struct ata_p
static void sil24_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
static void sil24_phy_reset(struct ata_port *ap);
static void sil24_qc_prep(struct ata_queued_cmd *qc);
-static int sil24_qc_issue(struct ata_queued_cmd *qc);
+static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc);
static void sil24_irq_clear(struct ata_port *ap);
static void sil24_eng_timeout(struct ata_port *ap);
static irqreturn_t sil24_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
@@ -557,7 +557,7 @@ static void sil24_qc_prep(struct ata_que
sil24_fill_sg(qc, sge);
}
-static int sil24_qc_issue(struct ata_queued_cmd *qc)
+static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
void __iomem *port = (void __iomem *)ap->ioaddr.cmd_addr;
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index bc87c16..3175c6b 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -174,7 +174,7 @@ static void pdc20621_get_from_dimm(struc
static void pdc20621_put_to_dimm(struct ata_probe_ent *pe,
void *psource, u32 offset, u32 size);
static void pdc20621_irq_clear(struct ata_port *ap);
-static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc);
+static unsigned int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc);
static struct scsi_host_template pdc_sata_sht = {
@@ -678,7 +678,7 @@ static void pdc20621_packet_start(struct
}
}
-static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc)
+static unsigned int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc)
{
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8ff3a7f..b1ea2f9 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -427,7 +427,7 @@ struct ata_port_operations {
void (*bmdma_start) (struct ata_queued_cmd *qc);
void (*qc_prep) (struct ata_queued_cmd *qc);
- int (*qc_issue) (struct ata_queued_cmd *qc);
+ unsigned int (*qc_issue) (struct ata_queued_cmd *qc);
void (*eng_timeout) (struct ata_port *ap);
@@ -515,7 +515,7 @@ extern void ata_port_stop (struct ata_po
extern void ata_host_stop (struct ata_host_set *host_set);
extern irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
extern void ata_qc_prep(struct ata_queued_cmd *qc);
-extern int ata_qc_issue_prot(struct ata_queued_cmd *qc);
+extern unsigned int ata_qc_issue_prot(struct ata_queued_cmd *qc);
extern void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf,
unsigned int buflen);
extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (4 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 06/13] libata: return AC_ERR_* from issue functions Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 7:09 ` Jeff Garzik
` (2 more replies)
2006-01-23 4:09 ` [PATCH 01/13] libata: fold __ata_qc_complete() into ata_qc_free() Tejun Heo
` (6 subsequent siblings)
12 siblings, 3 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo, linux-scsi, James.Bottomley
Export two SCSI EH command handling functions. To be used by libata EH.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
James, this patch is a part of libata-EH related updates. After the
update, libata EH uses functions exported from this patch to retry or
complete failed commands.
drivers/scsi/scsi_error.c | 7 ++++---
include/scsi/scsi_eh.h | 3 +++
2 files changed, 7 insertions(+), 3 deletions(-)
296dd6345057f576a875e6a140f0fd11e2b1d54f
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a2333d2..6bac3d2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -584,8 +584,7 @@ static int scsi_request_sense(struct scs
* keep a list of pending commands for final completion, and once we
* are ready to leave error handling we handle completion for real.
**/
-static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
- struct list_head *done_q)
+void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
{
scmd->device->host->host_failed--;
scmd->eh_eflags = 0;
@@ -597,6 +596,7 @@ static void scsi_eh_finish_cmd(struct sc
scsi_setup_cmd_retry(scmd);
list_move_tail(&scmd->eh_entry, done_q);
}
+EXPORT_SYMBOL(scsi_eh_finish_cmd);
/**
* scsi_eh_get_sense - Get device sense data.
@@ -1425,7 +1425,7 @@ static void scsi_eh_ready_devs(struct Sc
* @done_q: list_head of processed commands.
*
**/
-static void scsi_eh_flush_done_q(struct list_head *done_q)
+void scsi_eh_flush_done_q(struct list_head *done_q)
{
struct scsi_cmnd *scmd, *next;
@@ -1454,6 +1454,7 @@ static void scsi_eh_flush_done_q(struct
}
}
}
+EXPORT_SYMBOL(scsi_eh_flush_done_q);
/**
* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index fabd879..d160880 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -35,6 +35,9 @@ static inline int scsi_sense_valid(struc
}
+extern void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
+ struct list_head *done_q);
+extern void scsi_eh_flush_done_q(struct list_head *done_q);
extern void scsi_report_bus_reset(struct Scsi_Host *, int);
extern void scsi_report_device_reset(struct Scsi_Host *, int, int);
extern int scsi_block_when_processing_errors(struct scsi_device *);
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 4:09 ` [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q() Tejun Heo
@ 2006-01-23 7:09 ` Jeff Garzik
2006-01-23 7:26 ` Arjan van de Ven
2006-01-23 14:52 ` Tejun Heo
2 siblings, 0 replies; 33+ messages in thread
From: Jeff Garzik @ 2006-01-23 7:09 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide, albertcc, linux-scsi, James.Bottomley
On Mon, Jan 23, 2006 at 01:09:36PM +0900, Tejun Heo wrote:
> Export two SCSI EH command handling functions. To be used by libata EH.
>
> Signed-off-by: Tejun Heo <htejun@gmail.com>
>
> ---
>
> James, this patch is a part of libata-EH related updates. After the
> update, libata EH uses functions exported from this patch to retry or
> complete failed commands.
ACK (obviously). We'll want to un-export scsi_finish_command() once
this is all done.
Jeff
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 4:09 ` [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q() Tejun Heo
2006-01-23 7:09 ` Jeff Garzik
@ 2006-01-23 7:26 ` Arjan van de Ven
2006-01-23 8:20 ` Tejun Heo
2006-01-23 14:52 ` Tejun Heo
2 siblings, 1 reply; 33+ messages in thread
From: Arjan van de Ven @ 2006-01-23 7:26 UTC (permalink / raw)
To: Tejun Heo; +Cc: James.Bottomley, linux-scsi, albertcc, linux-ide, jgarzik
On Mon, 2006-01-23 at 13:09 +0900, Tejun Heo wrote:
> Export two SCSI EH command handling functions. To be used by libata EH.
since these are pretty much internal, can we make these _GPL exports?
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 7:26 ` Arjan van de Ven
@ 2006-01-23 8:20 ` Tejun Heo
2006-01-23 9:36 ` Christoph Hellwig
0 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 8:20 UTC (permalink / raw)
To: Arjan van de Ven
Cc: James.Bottomley, linux-scsi, albertcc, linux-ide, jgarzik
Arjan van de Ven wrote:
> On Mon, 2006-01-23 at 13:09 +0900, Tejun Heo wrote:
>
>>Export two SCSI EH command handling functions. To be used by libata EH.
>
> since these are pretty much internal, can we make these _GPL exports?
>
It's SCSI developers' decision. I chose EXPORT_SYMBOL because all other
exported symbols in scsi_error.c were using it. James?
--
tejun
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 8:20 ` Tejun Heo
@ 2006-01-23 9:36 ` Christoph Hellwig
2006-01-23 10:05 ` Tejun Heo
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2006-01-23 9:36 UTC (permalink / raw)
To: Tejun Heo
Cc: Arjan van de Ven, James.Bottomley, linux-scsi, albertcc,
linux-ide, jgarzik
On Mon, Jan 23, 2006 at 05:20:42PM +0900, Tejun Heo wrote:
> Arjan van de Ven wrote:
> > On Mon, 2006-01-23 at 13:09 +0900, Tejun Heo wrote:
> >
> >>Export two SCSI EH command handling functions. To be used by libata EH.
> >
> > since these are pretty much internal, can we make these _GPL exports?
> >
>
> It's SCSI developers' decision. I chose EXPORT_SYMBOL because all other
> exported symbols in scsi_error.c were using it. James?
These are internal functions. If we're going to export them at all then
as _GPL (which is grossly misnamed and should be _INTERNAL)
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 9:36 ` Christoph Hellwig
@ 2006-01-23 10:05 ` Tejun Heo
2006-01-24 17:11 ` Luben Tuikov
0 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 10:05 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Arjan van de Ven, James.Bottomley, linux-scsi, albertcc,
linux-ide, jgarzik
Christoph Hellwig wrote:
> On Mon, Jan 23, 2006 at 05:20:42PM +0900, Tejun Heo wrote:
>
>>Arjan van de Ven wrote:
>>
>>>On Mon, 2006-01-23 at 13:09 +0900, Tejun Heo wrote:
>>>
>>>
>>>>Export two SCSI EH command handling functions. To be used by libata EH.
>>>
>>>since these are pretty much internal, can we make these _GPL exports?
>>>
>>
>>It's SCSI developers' decision. I chose EXPORT_SYMBOL because all other
>>exported symbols in scsi_error.c were using it. James?
>
>
> These are internal functions. If we're going to export them at all then
> as _GPL (which is grossly misnamed and should be _INTERNAL)
And from the other thread created by my mistake.
> What do you need it for? Please send the whole patch series (and
> generally anything in libata that interacts with the scsi midlayer) to
> linux-scsi, please.
It's rather large patchset consisting of 13 libata-eh related patches.
Only two of the patches are relevant to SCSI. I'm not sure whether
cross-posting the whole thing is appropriate.
The first one exports two scsi_eh functions (this thread) and the second
one uses these two to implement ata_eh_retry/complete helpers.
http://marc.theaimsgroup.com/?l=linux-ide&m=113798939719926&w=2
http://marc.theaimsgroup.com/?l=linux-ide&m=113798939627887&w=2
These are used because libata implements shost->eh_strategy_handler for
error handling. libata's ->eh_strategy_handler should do everything
scsi_unjam_host() does and one chunk is to retry or complete failed SCSI
commands during or after EH completes. libata used to do this by
calling scsi_finish_command() directly, which is an internal interface
too. Directly calling scsi_finish_command() also used to have deadlock
problem when libata shared SCSI's host lock.
As what has to be done is identical, above two patches make libata share
that part of code with scsi_unjam_host(). And, as Jeff commented, once
these are settled scsi_fini_command() can be unexported.
--
tejun
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 10:05 ` Tejun Heo
@ 2006-01-24 17:11 ` Luben Tuikov
2006-01-24 17:20 ` Arjan van de Ven
2006-01-24 17:30 ` Jeff Garzik
0 siblings, 2 replies; 33+ messages in thread
From: Luben Tuikov @ 2006-01-24 17:11 UTC (permalink / raw)
To: Tejun Heo, Christoph Hellwig
Cc: Arjan van de Ven, James.Bottomley, linux-scsi, albertcc,
linux-ide, jgarzik
--- Tejun Heo <htejun@gmail.com> wrote:
> Christoph Hellwig wrote:
> > On Mon, Jan 23, 2006 at 05:20:42PM +0900, Tejun Heo wrote:
> >
> >>Arjan van de Ven wrote:
> >>
> >>>On Mon, 2006-01-23 at 13:09 +0900, Tejun Heo wrote:
> >>>
> >>>
> >>>>Export two SCSI EH command handling functions. To be used by libata EH.
> >>>
> >>>since these are pretty much internal, can we make these _GPL exports?
> >>>
> >>
> >>It's SCSI developers' decision. I chose EXPORT_SYMBOL because all other
> >>exported symbols in scsi_error.c were using it. James?
> >
> >
> > These are internal functions. If we're going to export them at all then
> > as _GPL (which is grossly misnamed and should be _INTERNAL)
>
> And from the other thread created by my mistake.
>
> > What do you need it for? Please send the whole patch series (and
> > generally anything in libata that interacts with the scsi midlayer) to
> > linux-scsi, please.
>
> It's rather large patchset consisting of 13 libata-eh related patches.
> Only two of the patches are relevant to SCSI. I'm not sure whether
> cross-posting the whole thing is appropriate.
>
> The first one exports two scsi_eh functions (this thread) and the second
> one uses these two to implement ata_eh_retry/complete helpers.
>
> http://marc.theaimsgroup.com/?l=linux-ide&m=113798939719926&w=2
> http://marc.theaimsgroup.com/?l=linux-ide&m=113798939627887&w=2
>
> These are used because libata implements shost->eh_strategy_handler for
> error handling. libata's ->eh_strategy_handler should do everything
> scsi_unjam_host() does and one chunk is to retry or complete failed SCSI
> commands during or after EH completes. libata used to do this by
> calling scsi_finish_command() directly, which is an internal interface
> too. Directly calling scsi_finish_command() also used to have deadlock
> problem when libata shared SCSI's host lock.
>
> As what has to be done is identical, above two patches make libata share
> that part of code with scsi_unjam_host(). And, as Jeff commented, once
> these are settled scsi_fini_command() can be unexported.
Let me understand:
So when scsi_finish_command() is unexported, then _all_ EH strategies
would have to define local done_q, and splice eh_cmd_q into local work_q,
then from local work_q into local done_q and then that is passed
to SCSI Core, via scsi_eh_flush_done_q().
Is it possible that this is done asynchrounously? Since EH recovery is
per host, but you may have only a single device which is misbehaving?
I.e. is it possible, in a such and such EH strategy, not necessarily
SCSI Cores's or libata's, to _not_ have a local done_q? So that,
we have only local work_q (after splicing eh_cmd_q) and after each
command is looked at, we "return" that command back to SCSI Core, so
that SCSI Core can "finish" it. (In effect "per-command", as opposed to
"per-queue-of-commands".)
Alternatively, simulating this using one-command-per-local-done_q and
calling scsi_eh_flush_done_q() each time a single command is added
to local done_q would seem rather inefficient.
Luben
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-24 17:11 ` Luben Tuikov
@ 2006-01-24 17:20 ` Arjan van de Ven
2006-01-24 18:25 ` Luben Tuikov
2006-01-24 17:30 ` Jeff Garzik
1 sibling, 1 reply; 33+ messages in thread
From: Arjan van de Ven @ 2006-01-24 17:20 UTC (permalink / raw)
To: ltuikov
Cc: Tejun Heo, Christoph Hellwig, James.Bottomley, linux-scsi,
albertcc, linux-ide, jgarzik
On Tue, 2006-01-24 at 09:11 -0800, Luben Tuikov wrote:
> So when scsi_finish_command() is unexported, then _all_ EH strategies
> would have to define local done_q, and splice eh_cmd_q into local work_q,
> then from local work_q into local done_q and then that is passed
> to SCSI Core, via scsi_eh_flush_done_q().
custom error handling was supposed to have gone away already, but then
libata came around for now... (until that moves to be a block driver),
so.. I don't see the problem really.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-24 17:20 ` Arjan van de Ven
@ 2006-01-24 18:25 ` Luben Tuikov
0 siblings, 0 replies; 33+ messages in thread
From: Luben Tuikov @ 2006-01-24 18:25 UTC (permalink / raw)
To: Arjan van de Ven
Cc: Tejun Heo, Christoph Hellwig, James.Bottomley, linux-scsi,
albertcc, linux-ide, jgarzik
--- Arjan van de Ven <arjan@infradead.org> wrote:
> custom error handling was supposed to have gone away already, but then
> libata came around for now... (until that moves to be a block driver),
> so.. I don't see the problem really.
Shouldn't one want more "custom error handling"? That is, per protocol.
So in effect what you call "custom error handling" actually means
per protocol error handling, be it FC, SATA, iSCSI, USB, SAS, etc.
So SCSI Core offloads recovery to the protocol stack, which depending
on the transport/interconnect may offload some (but not all) functionality
to the port driver. (Case in point: tunneling protocols.)
Then you can have SCSI Core only decide whether to retry to command.
(If so asked by the submitter from uppper layer.)
Luben
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-24 17:11 ` Luben Tuikov
2006-01-24 17:20 ` Arjan van de Ven
@ 2006-01-24 17:30 ` Jeff Garzik
2006-01-24 18:53 ` Luben Tuikov
1 sibling, 1 reply; 33+ messages in thread
From: Jeff Garzik @ 2006-01-24 17:30 UTC (permalink / raw)
To: Luben Tuikov
Cc: Tejun Heo, Christoph Hellwig, Arjan van de Ven, James.Bottomley,
linux-scsi, albertcc, linux-ide
On Tue, Jan 24, 2006 at 09:11:17AM -0800, Luben Tuikov wrote:
> So when scsi_finish_command() is unexported, then _all_ EH strategies
> would have to define local done_q, and splice eh_cmd_q into local work_q,
> then from local work_q into local done_q and then that is passed
> to SCSI Core, via scsi_eh_flush_done_q().
>
> Is it possible that this is done asynchrounously? Since EH recovery is
> per host, but you may have only a single device which is misbehaving?
No, because the host is stopped during EH, and only restarted after your
EH code completes its run.
> I.e. is it possible, in a such and such EH strategy, not necessarily
> SCSI Cores's or libata's, to _not_ have a local done_q? So that,
> we have only local work_q (after splicing eh_cmd_q) and after each
> command is looked at, we "return" that command back to SCSI Core, so
> that SCSI Core can "finish" it. (In effect "per-command", as opposed to
> "per-queue-of-commands".)
done_q is simply a method for returning one or more commands back to the
SCSI Core, so that the SCSI Core can finish it.
Per-command is irrelevant during EH, because queue processing is not
restarted until your EH handler ends completely.
One alternative strategy for EH is to override the ->eh_timed_out()
function. Its ugly, but you may effectively eliminate EH handling via
->eh_strategy_handler(), by moving the error handling completely within
(a) the interrupt handler or (b) ->eh_timed_out().
Jeff
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-24 17:30 ` Jeff Garzik
@ 2006-01-24 18:53 ` Luben Tuikov
0 siblings, 0 replies; 33+ messages in thread
From: Luben Tuikov @ 2006-01-24 18:53 UTC (permalink / raw)
To: Jeff Garzik
Cc: Tejun Heo, Christoph Hellwig, Arjan van de Ven, James.Bottomley,
linux-scsi, albertcc, linux-ide
--- Jeff Garzik <jgarzik@pobox.com> wrote:
> On Tue, Jan 24, 2006 at 09:11:17AM -0800, Luben Tuikov wrote:
> > So when scsi_finish_command() is unexported, then _all_ EH strategies
> > would have to define local done_q, and splice eh_cmd_q into local work_q,
> > then from local work_q into local done_q and then that is passed
> > to SCSI Core, via scsi_eh_flush_done_q().
> >
> > Is it possible that this is done asynchrounously? Since EH recovery is
> > per host, but you may have only a single device which is misbehaving?
>
> No, because the host is stopped during EH, and only restarted after your
> EH code completes its run.
Look at my sentence above, I just said "Since EH recovery is per host". You're
repeating what I had just said.
Maybe I was too subtle saying "Since EH recovery is per host, but you may
have only a single device which is misbehaving": the whole point is not to have
to stop the "host" in order to do error recovery. Finer grained recovery would
be better, obviously for many reasons, both technologically and logistically.
> > I.e. is it possible, in a such and such EH strategy, not necessarily
> > SCSI Cores's or libata's, to _not_ have a local done_q? So that,
> > we have only local work_q (after splicing eh_cmd_q) and after each
> > command is looked at, we "return" that command back to SCSI Core, so
> > that SCSI Core can "finish" it. (In effect "per-command", as opposed to
> > "per-queue-of-commands".)
>
> done_q is simply a method for returning one or more commands back to the
> SCSI Core, so that the SCSI Core can finish it.
Thank you for pointing the obvious.
> Per-command is irrelevant during EH, because queue processing is not
> restarted until your EH handler ends completely.
Which would not be the case if finer grained recovery is to be attained.
> One alternative strategy for EH is to override the ->eh_timed_out()
> function. Its ugly, but you may effectively eliminate EH handling via
> ->eh_strategy_handler(), by moving the error handling completely within
> (a) the interrupt handler or (b) ->eh_timed_out().
Neither. There's per protocol recovery EHs which override both
eh_timed_out() _and_ eh_strategy_handler(), in order to implement
protocol recovery.
Luben
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
2006-01-23 4:09 ` [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q() Tejun Heo
2006-01-23 7:09 ` Jeff Garzik
2006-01-23 7:26 ` Arjan van de Ven
@ 2006-01-23 14:52 ` Tejun Heo
2 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 14:52 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc, hch, arjan; +Cc: linux-scsi, James.Bottomley
[PATCH] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q()
Export two SCSI EH command handling functions. To be used by libata EH.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
Respin of the patch with EXPORT_SYMBOL_GPL.
Index: work1/drivers/scsi/scsi_error.c
===================================================================
--- work1.orig/drivers/scsi/scsi_error.c 2006-01-23 23:48:55.000000000 +0900
+++ work1/drivers/scsi/scsi_error.c 2006-01-23 23:49:32.000000000 +0900
@@ -584,8 +584,7 @@ static int scsi_request_sense(struct scs
* keep a list of pending commands for final completion, and once we
* are ready to leave error handling we handle completion for real.
**/
-static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
- struct list_head *done_q)
+void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
{
scmd->device->host->host_failed--;
scmd->eh_eflags = 0;
@@ -597,6 +596,7 @@ static void scsi_eh_finish_cmd(struct sc
scsi_setup_cmd_retry(scmd);
list_move_tail(&scmd->eh_entry, done_q);
}
+EXPORT_SYMBOL_GPL(scsi_eh_finish_cmd);
/**
* scsi_eh_get_sense - Get device sense data.
@@ -1425,7 +1425,7 @@ static void scsi_eh_ready_devs(struct Sc
* @done_q: list_head of processed commands.
*
**/
-static void scsi_eh_flush_done_q(struct list_head *done_q)
+void scsi_eh_flush_done_q(struct list_head *done_q)
{
struct scsi_cmnd *scmd, *next;
@@ -1454,6 +1454,7 @@ static void scsi_eh_flush_done_q(struct
}
}
}
+EXPORT_SYMBOL_GPL(scsi_eh_flush_done_q);
/**
* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
Index: work1/include/scsi/scsi_eh.h
===================================================================
--- work1.orig/include/scsi/scsi_eh.h 2006-01-23 23:48:55.000000000 +0900
+++ work1/include/scsi/scsi_eh.h 2006-01-23 23:49:02.000000000 +0900
@@ -35,6 +35,9 @@ static inline int scsi_sense_valid(struc
}
+extern void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
+ struct list_head *done_q);
+extern void scsi_eh_flush_done_q(struct list_head *done_q);
extern void scsi_report_bus_reset(struct Scsi_Host *, int);
extern void scsi_report_device_reset(struct Scsi_Host *, int, int);
extern int scsi_block_when_processing_errors(struct scsi_device *);
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 01/13] libata: fold __ata_qc_complete() into ata_qc_free()
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (5 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 07/13] SCSI: export scsi_eh_finish_cmd() and scsi_eh_flush_done_q() Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-27 3:34 ` Jeff Garzik
2006-01-23 4:09 ` [PATCH 08/13] libata: implement and apply ata_eh_qc_complete/retry() Tejun Heo
` (5 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
All ata_qc_free() does is calling __ata_qc_complete() which isn't used
anywhere else. Fold __ata_qc_complete() into ata_qc_free().
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-core.c | 30 ++++++++++++------------------
1 files changed, 12 insertions(+), 18 deletions(-)
62789ae9af2f157a4eb4dfcdb3ab2425adb9cc86
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index a07bd35..0e49323 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -73,7 +73,6 @@ static int fgb(u32 bitmap);
static int ata_choose_xfer_mode(const struct ata_port *ap,
u8 *xfer_mode_out,
unsigned int *xfer_shift_out);
-static void __ata_qc_complete(struct ata_queued_cmd *qc);
static unsigned int ata_unique_id = 1;
static struct workqueue_struct *ata_wq;
@@ -3593,21 +3592,6 @@ struct ata_queued_cmd *ata_qc_new_init(s
return qc;
}
-static void __ata_qc_complete(struct ata_queued_cmd *qc)
-{
- struct ata_port *ap = qc->ap;
- unsigned int tag;
-
- qc->flags = 0;
- tag = qc->tag;
- if (likely(ata_tag_valid(tag))) {
- if (tag == ap->active_tag)
- ap->active_tag = ATA_TAG_POISON;
- qc->tag = ATA_TAG_POISON;
- clear_bit(tag, &ap->qactive);
- }
-}
-
/**
* ata_qc_free - free unused ata_queued_cmd
* @qc: Command to complete
@@ -3620,9 +3604,19 @@ static void __ata_qc_complete(struct ata
*/
void ata_qc_free(struct ata_queued_cmd *qc)
{
+ struct ata_port *ap = qc->ap;
+ unsigned int tag;
+
assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
- __ata_qc_complete(qc);
+ qc->flags = 0;
+ tag = qc->tag;
+ if (likely(ata_tag_valid(tag))) {
+ if (tag == ap->active_tag)
+ ap->active_tag = ATA_TAG_POISON;
+ qc->tag = ATA_TAG_POISON;
+ clear_bit(tag, &ap->qactive);
+ }
}
/**
@@ -3662,7 +3656,7 @@ void ata_qc_complete(struct ata_queued_c
if (rc != 0)
return;
- __ata_qc_complete(qc);
+ ata_qc_free(qc);
VPRINTK("EXIT\n");
}
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 08/13] libata: implement and apply ata_eh_qc_complete/retry()
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (6 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 01/13] libata: fold __ata_qc_complete() into ata_qc_free() Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 09/13] libata: fix handling of race between timeout and completion Tejun Heo
` (4 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
Implement ata_eh_qc_complete/retry() using scsi_eh_finish_cmd() and
scsi_eh_flush_done_q(). This removes all eh scsicmd finish hacks from
low level drivers.
This change was first suggested by Jeff Garzik.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/ahci.c | 12 ++-------
drivers/scsi/libata-core.c | 14 ++++------
drivers/scsi/libata-scsi.c | 59 +++++++++++++++++++++++++++++++++++++++----
drivers/scsi/sata_mv.c | 12 +--------
drivers/scsi/sata_promise.c | 12 +--------
drivers/scsi/sata_sil24.c | 10 +------
drivers/scsi/sata_sx4.c | 12 +--------
include/linux/libata.h | 3 ++
8 files changed, 70 insertions(+), 64 deletions(-)
2d2e7f62009749f8f16816247364ccdccf279a6e
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index 0f6e4dd..5a6b230 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -672,19 +672,13 @@ static void ahci_eng_timeout(struct ata_
ap->id);
} else {
ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
-
- /* hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
- qc->scsidone = scsi_finish_command;
qc->err_mask |= AC_ERR_TIMEOUT;
- ata_qc_complete(qc);
}
spin_unlock_irqrestore(&host_set->lock, flags);
+
+ if (qc)
+ ata_eh_qc_complete(qc);
}
static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index b29bf0d..93ae2dc 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3449,14 +3449,6 @@ static void ata_qc_timeout(struct ata_qu
spin_lock_irqsave(&host_set->lock, flags);
- /* hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
- qc->scsidone = scsi_finish_command;
-
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
@@ -3480,12 +3472,13 @@ static void ata_qc_timeout(struct ata_qu
/* complete taskfile transaction */
qc->err_mask |= ac_err_mask(drv_stat);
- ata_qc_complete(qc);
break;
}
spin_unlock_irqrestore(&host_set->lock, flags);
+ ata_eh_qc_complete(qc);
+
DPRINTK("EXIT\n");
}
@@ -4422,6 +4415,7 @@ static void ata_host_init(struct ata_por
INIT_WORK(&ap->packet_task, atapi_packet_task, ap);
INIT_WORK(&ap->pio_task, ata_pio_task, ap);
+ INIT_LIST_HEAD(&ap->eh_done_q);
for (i = 0; i < ATA_MAX_DEVICES; i++)
ap->device[i].devno = i;
@@ -5165,6 +5159,8 @@ EXPORT_SYMBOL_GPL(ata_dev_classify);
EXPORT_SYMBOL_GPL(ata_dev_id_string);
EXPORT_SYMBOL_GPL(ata_dev_config);
EXPORT_SYMBOL_GPL(ata_scsi_simulate);
+EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
+EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
EXPORT_SYMBOL_GPL(ata_timing_compute);
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 95e3c27..ab6b533 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -738,17 +738,64 @@ int ata_scsi_error(struct Scsi_Host *hos
ap = (struct ata_port *) &host->hostdata[0];
ap->ops->eng_timeout(ap);
- /* TODO: this is per-command; when queueing is supported
- * this code will either change or move to a more
- * appropriate place
- */
- host->host_failed--;
- INIT_LIST_HEAD(&host->eh_cmd_q);
+ assert(host->host_failed == 0 && list_empty(&host->eh_cmd_q));
+
+ scsi_eh_flush_done_q(&ap->eh_done_q);
DPRINTK("EXIT\n");
return 0;
}
+static void ata_eh_scsidone(struct scsi_cmnd *scmd)
+{
+ /* nada */
+}
+
+static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct scsi_cmnd *scmd = qc->scsicmd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ap->host_set->lock, flags);
+ qc->scsidone = ata_eh_scsidone;
+ ata_qc_complete(qc);
+ assert(!ata_tag_valid(qc->tag));
+ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+ scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+}
+
+/**
+ * ata_eh_qc_complete - Complete an active ATA command from EH
+ * @qc: Command to complete
+ *
+ * Indicate to the mid and upper layers that an ATA command has
+ * completed. To be used from EH.
+ */
+void ata_eh_qc_complete(struct ata_queued_cmd *qc)
+{
+ struct scsi_cmnd *scmd = qc->scsicmd;
+ scmd->retries = scmd->allowed;
+ __ata_eh_qc_complete(qc);
+}
+
+/**
+ * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
+ * @qc: Command to retry
+ *
+ * Indicate to the mid and upper layers that an ATA command
+ * should be retried. To be used from EH.
+ *
+ * SCSI midlayer limits the number of retries to scmd->allowed.
+ * This function might need to adjust scmd->retries for commands
+ * which get retried due to unrelated NCQ failures.
+ */
+void ata_eh_qc_retry(struct ata_queued_cmd *qc)
+{
+ __ata_eh_qc_complete(qc);
+}
+
/**
* ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
* @qc: Storage for translated ATA taskfile
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index 281223a..d9a554c 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -1839,7 +1839,6 @@ static void mv_phy_reset(struct ata_port
static void mv_eng_timeout(struct ata_port *ap)
{
struct ata_queued_cmd *qc;
- unsigned long flags;
printk(KERN_ERR "ata%u: Entering mv_eng_timeout\n",ap->id);
DPRINTK("All regs @ start of eng_timeout\n");
@@ -1858,17 +1857,8 @@ static void mv_eng_timeout(struct ata_po
printk(KERN_ERR "ata%u: BUG: timeout without command\n",
ap->id);
} else {
- /* hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
- spin_lock_irqsave(&ap->host_set->lock, flags);
- qc->scsidone = scsi_finish_command;
qc->err_mask |= AC_ERR_TIMEOUT;
- ata_qc_complete(qc);
- spin_unlock_irqrestore(&ap->host_set->lock, flags);
+ ata_eh_qc_complete(qc);
}
}
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index bac36d5..77ef646 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -400,21 +400,12 @@ static void pdc_eng_timeout(struct ata_p
goto out;
}
- /* hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
- qc->scsidone = scsi_finish_command;
-
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
case ATA_PROT_NODATA:
printk(KERN_ERR "ata%u: command timeout\n", ap->id);
drv_stat = ata_wait_idle(ap);
qc->err_mask |= __ac_err_mask(drv_stat);
- ata_qc_complete(qc);
break;
default:
@@ -424,12 +415,13 @@ static void pdc_eng_timeout(struct ata_p
ap->id, qc->tf.command, drv_stat);
qc->err_mask |= ac_err_mask(drv_stat);
- ata_qc_complete(qc);
break;
}
out:
spin_unlock_irqrestore(&host_set->lock, flags);
+ if (qc)
+ ata_eh_qc_complete(qc);
DPRINTK("EXIT\n");
}
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index 5a7a7b1..7222fc7 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -644,17 +644,9 @@ static void sil24_eng_timeout(struct ata
return;
}
- /*
- * hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
printk(KERN_ERR "ata%u: command timeout\n", ap->id);
- qc->scsidone = scsi_finish_command;
qc->err_mask |= AC_ERR_TIMEOUT;
- ata_qc_complete(qc);
+ ata_eh_qc_complete(qc);
sil24_reset_controller(ap);
}
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index 3175c6b..9f992fb 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -872,20 +872,11 @@ static void pdc_eng_timeout(struct ata_p
goto out;
}
- /* hack alert! We cannot use the supplied completion
- * function from inside the ->eh_strategy_handler() thread.
- * libata is the only user of ->eh_strategy_handler() in
- * any kernel, so the default scsi_done() assumes it is
- * not being called from the SCSI EH.
- */
- qc->scsidone = scsi_finish_command;
-
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
case ATA_PROT_NODATA:
printk(KERN_ERR "ata%u: command timeout\n", ap->id);
qc->err_mask |= __ac_err_mask(ata_wait_idle(ap));
- ata_qc_complete(qc);
break;
default:
@@ -895,12 +886,13 @@ static void pdc_eng_timeout(struct ata_p
ap->id, qc->tf.command, drv_stat);
qc->err_mask |= ac_err_mask(drv_stat);
- ata_qc_complete(qc);
break;
}
out:
spin_unlock_irqrestore(&host_set->lock, flags);
+ if (qc)
+ ata_eh_qc_complete(qc);
DPRINTK("EXIT\n");
}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index b1ea2f9..576788d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -398,6 +398,7 @@ struct ata_port {
unsigned long pio_task_timeout;
u32 msg_enable;
+ struct list_head eh_done_q;
void *private_data;
};
@@ -490,6 +491,8 @@ extern int ata_scsi_detect(struct scsi_h
extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *));
extern int ata_scsi_error(struct Scsi_Host *host);
+extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
+extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
extern int ata_scsi_release(struct Scsi_Host *host);
extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
extern int ata_scsi_device_resume(struct scsi_device *);
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 09/13] libata: fix handling of race between timeout and completion
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (7 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 08/13] libata: implement and apply ata_eh_qc_complete/retry() Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-27 3:55 ` Jeff Garzik
2006-01-23 4:09 ` [PATCH 11/13] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
` (3 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
If a qc completes after SCSI timer expires but before libata EH kicks
in, the qc gets completed but the scsicmd still gets passed to libata
EH resulting in ->eng_timeout invocation with NULL qc. Currently none
of ->eng_timeout callbacks handles this properly. This patch makes
ata_scsi_error() bypass ->eng_timeout and handle this rare case.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-scsi.c | 42 +++++++++++++++++++++++++++++++++++++++---
1 files changed, 39 insertions(+), 3 deletions(-)
f18dbf3ad39650804ac9d7aee26f3d7a2d4a78b4
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index ab6b533..accb63a 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -731,12 +731,48 @@ int ata_scsi_slave_config(struct scsi_de
int ata_scsi_error(struct Scsi_Host *host)
{
- struct ata_port *ap;
+ struct ata_port *ap = (struct ata_port *) &host->hostdata[0];
+ struct ata_queued_cmd *qc;
+ unsigned long flags;
DPRINTK("ENTER\n");
- ap = (struct ata_port *) &host->hostdata[0];
- ap->ops->eng_timeout(ap);
+ spin_lock_irqsave(&ap->host_set->lock, flags);
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+ if (qc) {
+ ap->ops->eng_timeout(ap);
+ } else {
+ struct scsi_cmnd *scmd;
+ unsigned char *sb;
+
+ /* The scmd had timed out but the corresponding qc
+ * completed successfully inbetween timer expiration
+ * and here. Retry if possible.
+ *
+ * It is better to enter eng_timeout and perform EH
+ * before retrying the command, but this case should
+ * be _very_ rare and eng_timeout isn't ready for
+ * NULL-qc case.
+ */
+ scmd = list_entry(host->eh_cmd_q.next,
+ struct scsi_cmnd, eh_entry);
+ sb = scmd->sense_buffer;
+
+ /* Timeout, fake parity for now */
+ scmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+ sb[0] = 0x70;
+ sb[7] = 0x0a;
+ sb[2] = ABORTED_COMMAND;
+ sb[12] = 0x47;
+ sb[13] = 0x00;
+
+ printk(KERN_WARNING "ata%u: interrupt and timer raced for "
+ "scsicmd %p\n", ap->id, scmd);
+
+ scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+ }
assert(host->host_failed == 0 && list_empty(&host->eh_cmd_q));
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 09/13] libata: fix handling of race between timeout and completion
2006-01-23 4:09 ` [PATCH 09/13] libata: fix handling of race between timeout and completion Tejun Heo
@ 2006-01-27 3:55 ` Jeff Garzik
2006-01-27 3:58 ` Jeff Garzik
2006-02-01 15:36 ` Tejun Heo
0 siblings, 2 replies; 33+ messages in thread
From: Jeff Garzik @ 2006-01-27 3:55 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide, albertcc
Tejun Heo wrote:
> If a qc completes after SCSI timer expires but before libata EH kicks
> in, the qc gets completed but the scsicmd still gets passed to libata
> EH resulting in ->eng_timeout invocation with NULL qc. Currently none
> of ->eng_timeout callbacks handles this properly. This patch makes
> ata_scsi_error() bypass ->eng_timeout and handle this rare case.
>
> Signed-off-by: Tejun Heo <htejun@gmail.com>
Doesn't look right to me. If the qc gets completed, then the SCSI
command should have also been completed.
AFAICS the race is this: if the qc is completed, then there is no
active tag, so ata_qc_from_tag() correctly returns NULL. This case is
really an "error has already been handled" case. It looks really wrong
to even touch a scsicmd. I would just clear the eh_cmd list and be done
with it.
Jeff
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 09/13] libata: fix handling of race between timeout and completion
2006-01-27 3:55 ` Jeff Garzik
@ 2006-01-27 3:58 ` Jeff Garzik
2006-02-01 15:36 ` Tejun Heo
1 sibling, 0 replies; 33+ messages in thread
From: Jeff Garzik @ 2006-01-27 3:58 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide, albertcc
Jeff Garzik wrote:
> Tejun Heo wrote:
>
>> If a qc completes after SCSI timer expires but before libata EH kicks
>> in, the qc gets completed but the scsicmd still gets passed to libata
>> EH resulting in ->eng_timeout invocation with NULL qc. Currently none
>> of ->eng_timeout callbacks handles this properly. This patch makes
>> ata_scsi_error() bypass ->eng_timeout and handle this rare case.
>>
>> Signed-off-by: Tejun Heo <htejun@gmail.com>
>
>
> Doesn't look right to me. If the qc gets completed, then the SCSI
> command should have also been completed.
>
> AFAICS the race is this: if the qc is completed, then there is no
> active tag, so ata_qc_from_tag() correctly returns NULL. This case is
> really an "error has already been handled" case. It looks really wrong
> to even touch a scsicmd. I would just clear the eh_cmd list and be done
> with it.
dropped patches 9-10
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 09/13] libata: fix handling of race between timeout and completion
2006-01-27 3:55 ` Jeff Garzik
2006-01-27 3:58 ` Jeff Garzik
@ 2006-02-01 15:36 ` Tejun Heo
2006-02-09 6:21 ` Jeff Garzik
1 sibling, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-02-01 15:36 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide, albertcc
[-- Attachment #1: Type: text/plain, Size: 5272 bytes --]
Hello, Jeff.
On Thu, Jan 26, 2006 at 10:55:59PM -0500, Jeff Garzik wrote:
> Tejun Heo wrote:
> >If a qc completes after SCSI timer expires but before libata EH kicks
> >in, the qc gets completed but the scsicmd still gets passed to libata
> >EH resulting in ->eng_timeout invocation with NULL qc. Currently none
> >of ->eng_timeout callbacks handles this properly. This patch makes
> >ata_scsi_error() bypass ->eng_timeout and handle this rare case.
> >
> >Signed-off-by: Tejun Heo <htejun@gmail.com>
>
> Doesn't look right to me. If the qc gets completed, then the SCSI
> command should have also been completed.
>
> AFAICS the race is this: if the qc is completed, then there is no
> active tag, so ata_qc_from_tag() correctly returns NULL. This case is
> really an "error has already been handled" case. It looks really wrong
> to even touch a scsicmd. I would just clear the eh_cmd list and be done
> with it.
>
SCSI EH never invokes ->eh_strategy_handler without any failed
scsicmd. The only case where qc <-> scmd mismatch occurs is the
following case.
1. SCSI issues command (timer activated)
2. libata interprets and issues
3. time passes
4. SCSI timeout occurs, scmd queued for EH
5. irq occurs, completes qc. ata_qc_complete() calls qc->complete_fn
which ends up calling scsi_done(). scsi_done() sees that SCSI
timer has already expired, so it returns without doing anything
expecting EH to clean up.
6. SCSI EH kicks in, libata ->eh_strategy_handler (ata_scsi_error)
invoked.
7. in libata EH, we have no active qc but scmds to recover.
This can be reproduced by delaying completion of a qc by 31 secs and
EH of the command by 5 secs.
@30: timeout kicks in and invoked EH sleeps for 5secs
@31: delayed qc completion
@35: EH continues
With the current upstream branch, the following is the result of such
race.
ata1: inducing race condition
ata1: delaying completion of f774c818 by 31 secs
ata1: EH invoked for delayed qc f774c818, sleeping 5secs
ata1: delayed completion of f774c818
ata1: woke up, proceeding with EH
ata1: handling error/timeout
ata1: BUG: timeout without command
Assertion failed! host->host_failed == 0 && list_empty(&host->eh_cmd_q),drivers/
scsi/libata-scsi.c,ata_scsi_error,line=751
ata1: handling error/timeout
ata1: BUG: timeout without command
Assertion failed! host->host_failed == 0 && list_empty(&host->eh_cmd_q),drivers/
scsi/libata-scsi.c,ata_scsi_error,line=751
ata1: handling error/timeout
ata1: BUG: timeout without command
Assertion failed! host->host_failed == 0 && list_empty(&host->eh_cmd_q),drivers/
scsi/libata-scsi.c,ata_scsi_error,line=751
ata1: handling error/timeout
ata1: BUG: timeout without command
Assertion failed! host->host_failed == 0 && list_empty(&host->eh_cmd_q),drivers/
scsi/libata-scsi.c,ata_scsi_error,line=751
... repeat ...
What happens is, eh_cmd_q and host_failed aren't cleared properly and
scsi_error_handler() ends up looping forever.
If host->eh_cmd_q and host->host_failed are cleared as you suggested,
the following happens.
ata1: inducing race condition
ata1: delaying completion of f774c818 by 31 secs
ata1: EH invoked for delayed qc f774c818, sleeping 5secs
ata1: delayed completion of f774c818
ata1: woke up, proceeding with EH
<< the drive is locked up >>
As eh_cmd_q and host_failed are cleared, scsi_error_handler() goes to
sleep but the failed scmd is not actually finished. From SCSI's POV,
it's still out there (host_busy and device_busy haven't been decreased
yet). As our queue depth is 1, no other command will ever get issued
again.
With the patch I submitted, the result looks like...
ata1: inducing race condition
ata1: delaying completion of f774b818 by 31 secs
ata1: EH invoked for delayed qc f774b818, sleeping 5secs
ata1: delayed completion of f774b818
ata1: woke up, proceeding with EH
ata1: interrupt and timer raced for scsicmd f774943c
ata1: inducing race condition
ata1: delaying completion of f774b818 by 31 secs
ata1: EH invoked for delayed qc f774b818, sleeping 5secs
ata1: delayed completion of f774b818
ata1: woke up, proceeding with EH
ata1: interrupt and timer raced for scsicmd f774919c
ata1: inducing race condition
ata1: delaying completion of f774b818 by 31 secs
ata1: EH invoked for delayed qc f774b818, sleeping 5secs
ata1: delayed completion of f774b818
ata1: woke up, proceeding with EH
ata1: interrupt and timer raced for scsicmd f774943c
It recovers and IO continues.
The problem is that there are race conditions which make scmd's
lifetime and qc's lifetime different. qc ends up dying earlier than
its scmd and in the process we also lose completion status. The only
thing that can/must be done is retrying or failing the scmd.
Note that altough this race condition should be very rare but with NCQ
the likelihood increases as EH waits until all commands are settled
after a qc times out. This widens the window for such race condition
considerably.
I'm attaching four patches used for the above test cases.
* race-patch : race inducing patch on top of upstream
* jeff-patch : clear eh_cmd_q and host_failed only on race
* jeff-race-patch : race inducing patch on top of jeff-patch
* tj-race-patch : race inducing patch on top of this fix
--
tejun
[-- Attachment #2: race-patch --]
[-- Type: text/plain, Size: 2232 bytes --]
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 249e67f..d3bc7c1 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3919,6 +3919,13 @@ void ata_qc_free(struct ata_queued_cmd *
}
}
+static void delayed_qc_complete(unsigned long arg)
+{
+ struct ata_queued_cmd *qc = (void *)arg;
+ printk(KERN_ERR "ata%u: delayed completion of %p\n", qc->ap->id, qc);
+ ata_qc_complete(qc);
+}
+
/**
* ata_qc_complete - Complete an active ATA command
* @qc: Command to complete
@@ -3936,6 +3943,17 @@ void ata_qc_complete(struct ata_queued_c
assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
assert(qc->flags & ATA_QCFLAG_ACTIVE);
+ if (qc->flags & (1 << 31) && !(qc->flags & (1 << 30))) {
+ static struct timer_list timer;
+ printk(KERN_ERR "ata%u: delaying completion of %p by 31 secs\n",
+ qc->ap->id, qc);
+ qc->flags |= 1 << 30;
+ setup_timer(&timer, delayed_qc_complete, (unsigned long)qc);
+ timer.expires = jiffies + 31 * HZ;
+ add_timer(&timer);
+ return;
+ }
+
if (likely(qc->flags & ATA_QCFLAG_DMAMAP))
ata_sg_clean(qc);
@@ -4010,6 +4028,15 @@ unsigned int ata_qc_issue(struct ata_que
qc->ap->active_tag = qc->tag;
qc->flags |= ATA_QCFLAG_ACTIVE;
+ {
+ static int cnt = 0;
+ if (++cnt % 1000 == 0) {
+ printk(KERN_ERR "ata%u: inducing race condition\n",
+ ap->id);
+ qc->flags |= (1 << 31);
+ }
+ }
+
return ap->ops->qc_issue(qc);
sg_err:
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 6df8293..53a7c84 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -732,10 +732,20 @@ int ata_scsi_slave_config(struct scsi_de
int ata_scsi_error(struct Scsi_Host *host)
{
struct ata_port *ap;
+ struct ata_queued_cmd *qc;
DPRINTK("ENTER\n");
ap = (struct ata_port *) &host->hostdata[0];
+
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+ if (qc && (qc->flags & (1 << 31))) {
+ printk(KERN_ERR "ata%u: EH invoked for delayed qc %p, sleeping 5secs\n",
+ ap->id, qc);
+ msleep(5000);
+ printk(KERN_ERR "ata%u: woke up, proceeding with EH\n", ap->id);
+ }
+
ap->ops->eng_timeout(ap);
assert(host->host_failed == 0 && list_empty(&host->eh_cmd_q));
[-- Attachment #3: jeff-patch --]
[-- Type: text/plain, Size: 657 bytes --]
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 6df8293..17920d1 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -732,11 +732,19 @@ int ata_scsi_slave_config(struct scsi_de
int ata_scsi_error(struct Scsi_Host *host)
{
struct ata_port *ap;
+ struct ata_queued_cmd *qc;
DPRINTK("ENTER\n");
ap = (struct ata_port *) &host->hostdata[0];
- ap->ops->eng_timeout(ap);
+
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+ if (qc)
+ ap->ops->eng_timeout(ap);
+ else {
+ host->host_failed = 0;
+ INIT_LIST_HEAD(&host->eh_cmd_q);
+ }
assert(host->host_failed == 0 && list_empty(&host->eh_cmd_q));
[-- Attachment #4: jeff-race-patch --]
[-- Type: text/plain, Size: 1957 bytes --]
diff -u b/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
--- b/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -739,6 +739,14 @@
ap = (struct ata_port *) &host->hostdata[0];
qc = ata_qc_from_tag(ap, ap->active_tag);
+ if (qc && (qc->flags & (1 << 31))) {
+ printk(KERN_ERR "ata%u: EH invoked for delayed qc %p, sleeping 5secs\n",
+ ap->id, qc);
+ msleep(5000);
+ printk(KERN_ERR "ata%u: woke up, proceeding with EH\n", ap->id);
+ }
+
+ qc = ata_qc_from_tag(ap, ap->active_tag);
if (qc)
ap->ops->eng_timeout(ap);
else {
only in patch2:
unchanged:
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3919,6 +3919,13 @@ void ata_qc_free(struct ata_queued_cmd *
}
}
+static void delayed_qc_complete(unsigned long arg)
+{
+ struct ata_queued_cmd *qc = (void *)arg;
+ printk(KERN_ERR "ata%u: delayed completion of %p\n", qc->ap->id, qc);
+ ata_qc_complete(qc);
+}
+
/**
* ata_qc_complete - Complete an active ATA command
* @qc: Command to complete
@@ -3936,6 +3943,17 @@ void ata_qc_complete(struct ata_queued_c
assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
assert(qc->flags & ATA_QCFLAG_ACTIVE);
+ if (qc->flags & (1 << 31) && !(qc->flags & (1 << 30))) {
+ static struct timer_list timer;
+ printk(KERN_ERR "ata%u: delaying completion of %p by 31 secs\n",
+ qc->ap->id, qc);
+ qc->flags |= 1 << 30;
+ setup_timer(&timer, delayed_qc_complete, (unsigned long)qc);
+ timer.expires = jiffies + 31 * HZ;
+ add_timer(&timer);
+ return;
+ }
+
if (likely(qc->flags & ATA_QCFLAG_DMAMAP))
ata_sg_clean(qc);
@@ -4010,6 +4028,15 @@ unsigned int ata_qc_issue(struct ata_que
qc->ap->active_tag = qc->tag;
qc->flags |= ATA_QCFLAG_ACTIVE;
+ {
+ static int cnt = 0;
+ if (++cnt % 1000 == 0) {
+ printk(KERN_ERR "ata%u: inducing race condition\n",
+ ap->id);
+ qc->flags |= (1 << 31);
+ }
+ }
+
return ap->ops->qc_issue(qc);
sg_err:
[-- Attachment #5: tj-race-patch --]
[-- Type: text/plain, Size: 2310 bytes --]
Index: work1/drivers/scsi/libata-core.c
===================================================================
--- work1.orig/drivers/scsi/libata-core.c 2006-02-01 23:29:00.000000000 +0900
+++ work1/drivers/scsi/libata-core.c 2006-02-01 23:29:20.000000000 +0900
@@ -3959,6 +3959,13 @@ void ata_qc_free(struct ata_queued_cmd *
}
}
+static void delayed_qc_complete(unsigned long arg)
+{
+ struct ata_queued_cmd *qc = (void *)arg;
+ printk(KERN_ERR "ata%u: delayed completion of %p\n", qc->ap->id, qc);
+ ata_qc_complete(qc);
+}
+
/**
* ata_qc_complete - Complete an active ATA command
* @qc: Command to complete
@@ -3976,6 +3983,17 @@ void ata_qc_complete(struct ata_queued_c
assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */
assert(qc->flags & ATA_QCFLAG_ACTIVE);
+ if (qc->flags & (1 << 31) && !(qc->flags & (1 << 30))) {
+ static struct timer_list timer;
+ printk(KERN_ERR "ata%u: delaying completion of %p by 31 secs\n",
+ qc->ap->id, qc);
+ qc->flags |= 1 << 30;
+ setup_timer(&timer, delayed_qc_complete, (unsigned long)qc);
+ timer.expires = jiffies + 31 * HZ;
+ add_timer(&timer);
+ return;
+ }
+
if (likely(qc->flags & ATA_QCFLAG_DMAMAP))
ata_sg_clean(qc);
@@ -4050,6 +4068,15 @@ unsigned int ata_qc_issue(struct ata_que
qc->ap->active_tag = qc->tag;
qc->flags |= ATA_QCFLAG_ACTIVE;
+ {
+ static int cnt = 0;
+ if (++cnt % 1000 == 0) {
+ printk(KERN_ERR "ata%u: inducing race condition\n",
+ ap->id);
+ qc->flags |= (1 << 31);
+ }
+ }
+
return ap->ops->qc_issue(qc);
sg_err:
Index: work1/drivers/scsi/libata-scsi.c
===================================================================
--- work1.orig/drivers/scsi/libata-scsi.c 2006-02-01 23:29:00.000000000 +0900
+++ work1/drivers/scsi/libata-scsi.c 2006-02-01 23:29:50.000000000 +0900
@@ -737,6 +737,14 @@ int ata_scsi_error(struct Scsi_Host *hos
DPRINTK("ENTER\n");
+ qc = ata_qc_from_tag(ap, ap->active_tag);
+ if (qc && (qc->flags & (1 << 31))) {
+ printk(KERN_ERR "ata%u: EH invoked for delayed qc %p, sleeping 5secs\n",
+ ap->id, qc);
+ msleep(5000);
+ printk(KERN_ERR "ata%u: woke up, proceeding with EH\n", ap->id);
+ }
+
spin_lock_irqsave(&ap->host_set->lock, flags);
qc = ata_qc_from_tag(ap, ap->active_tag);
assert(!(ap->flags & ATA_FLAG_IN_EH));
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 09/13] libata: fix handling of race between timeout and completion
2006-02-01 15:36 ` Tejun Heo
@ 2006-02-09 6:21 ` Jeff Garzik
0 siblings, 0 replies; 33+ messages in thread
From: Jeff Garzik @ 2006-02-09 6:21 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide, albertcc
Tejun Heo wrote:
> SCSI EH never invokes ->eh_strategy_handler without any failed
> scsicmd. The only case where qc <-> scmd mismatch occurs is the
> following case.
>
> 1. SCSI issues command (timer activated)
> 2. libata interprets and issues
> 3. time passes
> 4. SCSI timeout occurs, scmd queued for EH
> 5. irq occurs, completes qc. ata_qc_complete() calls qc->complete_fn
> which ends up calling scsi_done(). scsi_done() sees that SCSI
> timer has already expired, so it returns without doing anything
> expecting EH to clean up.
> 6. SCSI EH kicks in, libata ->eh_strategy_handler (ata_scsi_error)
> invoked.
> 7. in libata EH, we have no active qc but scmds to recover.
OK, I agree with your analysis of the problem above.
However... [see next email, commenting on patch #3]
Jeff
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 11/13] libata: implement ATA_FLAG_IN_EH port flag
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (8 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 09/13] libata: fix handling of race between timeout and completion Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-27 4:00 ` Jeff Garzik
2006-01-23 4:09 ` [PATCH 12/13] libata: create pio/atapi task queueing wrappers Tejun Heo
` (2 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
ATA_FLAG_IN_EH flag is set on entry to EH and cleared on completion.
This patch just sets and clears the flag. Following patches will
build normal qc execution / EH synchronization aroung this flag.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-scsi.c | 6 ++++++
include/linux/libata.h | 2 ++
2 files changed, 8 insertions(+), 0 deletions(-)
e4869b2aeea09c839ee8b291fcc286479d57c21d
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index accb63a..cf54536 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -738,6 +738,8 @@ int ata_scsi_error(struct Scsi_Host *hos
DPRINTK("ENTER\n");
spin_lock_irqsave(&ap->host_set->lock, flags);
+ assert(!(ap->flags & ATA_FLAG_IN_EH));
+ ap->flags |= ATA_FLAG_IN_EH;
qc = ata_qc_from_tag(ap, ap->active_tag);
spin_unlock_irqrestore(&ap->host_set->lock, flags);
@@ -778,6 +780,10 @@ int ata_scsi_error(struct Scsi_Host *hos
scsi_eh_flush_done_q(&ap->eh_done_q);
+ spin_lock_irqsave(&ap->host_set->lock, flags);
+ ap->flags &= ~ATA_FLAG_IN_EH;
+ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
DPRINTK("EXIT\n");
return 0;
}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 576788d..063221d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -162,6 +162,8 @@ enum {
ATA_FLAG_PIO_LBA48 = (1 << 13), /* Host DMA engine is LBA28 only */
ATA_FLAG_IRQ_MASK = (1 << 14), /* Mask IRQ in PIO xfers */
+ ATA_FLAG_IN_EH = (1 << 15), /* EH in progress */
+
ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
ATA_QCFLAG_SINGLE = (1 << 4), /* no s/g, just a single buffer */
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 12/13] libata: create pio/atapi task queueing wrappers
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (9 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 11/13] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-27 4:02 ` Jeff Garzik
2006-01-23 4:09 ` [PATCH 10/13] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
2006-01-23 4:09 ` [PATCH 13/13] libata: EH / pio tasks synchronization Tejun Heo
12 siblings, 1 reply; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
Wrap pio/atapi task queueing in correspondingly named functions. This
change doesn't change anything. It's preparation for follow-up
pio-task/eh sync patch.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-core.c | 30 ++++++++++++++++++++++++------
1 files changed, 24 insertions(+), 6 deletions(-)
3ebd6e9ce709b8596495a06eeccc4d44ed7117eb
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index b2f51bb..10e07db 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1072,6 +1072,24 @@ static unsigned int ata_pio_modes(const
timing API will get this right anyway */
}
+static inline void
+ata_queue_packet_task(struct ata_port *ap)
+{
+ queue_work(ata_wq, &ap->packet_task);
+}
+
+static inline void
+ata_queue_pio_task(struct ata_port *ap)
+{
+ queue_work(ata_wq, &ap->pio_task);
+}
+
+static inline void
+ata_queue_delayed_pio_task(struct ata_port *ap, unsigned long delay)
+{
+ queue_delayed_work(ata_wq, &ap->pio_task, delay);
+}
+
void ata_qc_complete_internal(struct ata_queued_cmd *qc)
{
struct completion *waiting = qc->private_data;
@@ -3414,7 +3432,7 @@ fsm_start:
}
if (timeout)
- queue_delayed_work(ata_wq, &ap->pio_task, timeout);
+ ata_queue_delayed_pio_task(ap, timeout);
else if (!qc_completed)
goto fsm_start;
}
@@ -3727,26 +3745,26 @@ unsigned int ata_qc_issue_prot(struct at
ata_qc_set_polling(qc);
ata_tf_to_host(ap, &qc->tf);
ap->hsm_task_state = HSM_ST;
- queue_work(ata_wq, &ap->pio_task);
+ ata_queue_pio_task(ap);
break;
case ATA_PROT_ATAPI:
ata_qc_set_polling(qc);
ata_tf_to_host(ap, &qc->tf);
- queue_work(ata_wq, &ap->packet_task);
+ ata_queue_packet_task(ap);
break;
case ATA_PROT_ATAPI_NODATA:
ap->flags |= ATA_FLAG_NOINTR;
ata_tf_to_host(ap, &qc->tf);
- queue_work(ata_wq, &ap->packet_task);
+ ata_queue_packet_task(ap);
break;
case ATA_PROT_ATAPI_DMA:
ap->flags |= ATA_FLAG_NOINTR;
ap->ops->tf_load(ap, &qc->tf); /* load tf registers */
ap->ops->bmdma_setup(qc); /* set up bmdma */
- queue_work(ata_wq, &ap->packet_task);
+ ata_queue_packet_task(ap);
break;
default:
@@ -4177,7 +4195,7 @@ static void atapi_packet_task(void *_dat
/* PIO commands are handled by polling */
ap->hsm_task_state = HSM_ST;
- queue_work(ata_wq, &ap->pio_task);
+ ata_queue_pio_task(ap);
}
return;
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 10/13] libata: kill NULL qc handling from ->eng_timeout callbacks
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (10 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 12/13] libata: create pio/atapi task queueing wrappers Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
2006-01-23 4:09 ` [PATCH 13/13] libata: EH / pio tasks synchronization Tejun Heo
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
->eng_timeout is not invoked with NULL qc anymore. Kill NULL qc
handling from all ->eng_timeout callbacks.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/ahci.c | 12 +++---------
drivers/scsi/libata-core.c | 12 +-----------
drivers/scsi/sata_mv.c | 9 ++-------
drivers/scsi/sata_promise.c | 9 +--------
drivers/scsi/sata_sil24.c | 5 -----
drivers/scsi/sata_sx4.c | 9 +--------
6 files changed, 8 insertions(+), 48 deletions(-)
7fe3cc73fb7f92c06c71e1798edefc2be6a6723c
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index 5a6b230..604c4d1 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -666,19 +666,13 @@ static void ahci_eng_timeout(struct ata_
spin_lock_irqsave(&host_set->lock, flags);
+ ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
qc = ata_qc_from_tag(ap, ap->active_tag);
- if (!qc) {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- } else {
- ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
- qc->err_mask |= AC_ERR_TIMEOUT;
- }
+ qc->err_mask |= AC_ERR_TIMEOUT;
spin_unlock_irqrestore(&host_set->lock, flags);
- if (qc)
- ata_eh_qc_complete(qc);
+ ata_eh_qc_complete(qc);
}
static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 93ae2dc..b2f51bb 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3503,20 +3503,10 @@ static void ata_qc_timeout(struct ata_qu
void ata_eng_timeout(struct ata_port *ap)
{
- struct ata_queued_cmd *qc;
-
DPRINTK("ENTER\n");
- qc = ata_qc_from_tag(ap, ap->active_tag);
- if (qc)
- ata_qc_timeout(qc);
- else {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- goto out;
- }
+ ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
-out:
DPRINTK("EXIT\n");
}
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index d9a554c..9b56f9b 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -1853,13 +1853,8 @@ static void mv_eng_timeout(struct ata_po
mv_err_intr(ap);
mv_stop_and_reset(ap);
- if (!qc) {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- } else {
- qc->err_mask |= AC_ERR_TIMEOUT;
- ata_eh_qc_complete(qc);
- }
+ qc->err_mask |= AC_ERR_TIMEOUT;
+ ata_eh_qc_complete(qc);
}
/**
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index 77ef646..9fc9cbf 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -394,11 +394,6 @@ static void pdc_eng_timeout(struct ata_p
spin_lock_irqsave(&host_set->lock, flags);
qc = ata_qc_from_tag(ap, ap->active_tag);
- if (!qc) {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- goto out;
- }
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
@@ -418,10 +413,8 @@ static void pdc_eng_timeout(struct ata_p
break;
}
-out:
spin_unlock_irqrestore(&host_set->lock, flags);
- if (qc)
- ata_eh_qc_complete(qc);
+ ata_eh_qc_complete(qc);
DPRINTK("EXIT\n");
}
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index 7222fc7..1160fda 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -638,11 +638,6 @@ static void sil24_eng_timeout(struct ata
struct ata_queued_cmd *qc;
qc = ata_qc_from_tag(ap, ap->active_tag);
- if (!qc) {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- return;
- }
printk(KERN_ERR "ata%u: command timeout\n", ap->id);
qc->err_mask |= AC_ERR_TIMEOUT;
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index 9f992fb..b4ffa3f 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -866,11 +866,6 @@ static void pdc_eng_timeout(struct ata_p
spin_lock_irqsave(&host_set->lock, flags);
qc = ata_qc_from_tag(ap, ap->active_tag);
- if (!qc) {
- printk(KERN_ERR "ata%u: BUG: timeout without command\n",
- ap->id);
- goto out;
- }
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
@@ -889,10 +884,8 @@ static void pdc_eng_timeout(struct ata_p
break;
}
-out:
spin_unlock_irqrestore(&host_set->lock, flags);
- if (qc)
- ata_eh_qc_complete(qc);
+ ata_eh_qc_complete(qc);
DPRINTK("EXIT\n");
}
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 13/13] libata: EH / pio tasks synchronization
2006-01-23 4:09 [PATCHSET] libata: various fixes related to EH, take #2 Tejun Heo
` (11 preceding siblings ...)
2006-01-23 4:09 ` [PATCH 10/13] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
@ 2006-01-23 4:09 ` Tejun Heo
12 siblings, 0 replies; 33+ messages in thread
From: Tejun Heo @ 2006-01-23 4:09 UTC (permalink / raw)
To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo
This patch makes sure that pio tasks are flushed before proceeding
with EH.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
drivers/scsi/libata-core.c | 56 ++++++++++++++++++++++++++++++++++++++++++--
include/linux/libata.h | 3 ++
2 files changed, 55 insertions(+), 4 deletions(-)
118d94ccf284d59d967ff40f52d2d6148e0fcfc6
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 10e07db..a6be973 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1075,19 +1075,66 @@ static unsigned int ata_pio_modes(const
static inline void
ata_queue_packet_task(struct ata_port *ap)
{
- queue_work(ata_wq, &ap->packet_task);
+ if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+ queue_work(ata_wq, &ap->packet_task);
}
static inline void
ata_queue_pio_task(struct ata_port *ap)
{
- queue_work(ata_wq, &ap->pio_task);
+ if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+ queue_work(ata_wq, &ap->pio_task);
}
static inline void
ata_queue_delayed_pio_task(struct ata_port *ap, unsigned long delay)
{
- queue_delayed_work(ata_wq, &ap->pio_task, delay);
+ if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+ queue_delayed_work(ata_wq, &ap->pio_task, delay);
+}
+
+/**
+ * ata_flush_pio_tasks - Flush pio_task and packet_task
+ * @ap: the target ata_port
+ *
+ * After this function completes, pio_task and packet_task are
+ * guranteed not to be running or scheduled.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ */
+
+static void ata_flush_pio_tasks(struct ata_port *ap)
+{
+ int tmp = 0;
+ unsigned long flags;
+
+ DPRINTK("ENTER\n");
+
+ spin_lock_irqsave(&ap->host_set->lock, flags);
+ ap->flags |= ATA_FLAG_FLUSH_PIO_TASK;
+ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+ DPRINTK("flush #1\n");
+ flush_workqueue(ata_wq);
+
+ /*
+ * At this point, if a task is running, it's guaranteed to see
+ * the FLUSH flag; thus, it will never queue pio tasks again.
+ * Cancel and flush.
+ */
+ tmp |= cancel_delayed_work(&ap->pio_task);
+ tmp |= cancel_delayed_work(&ap->packet_task);
+ if (!tmp) {
+ DPRINTK("flush #2\n");
+ flush_workqueue(ata_wq);
+ }
+
+ spin_lock_irqsave(&ap->host_set->lock, flags);
+ ap->flags &= ~ATA_FLAG_FLUSH_PIO_TASK;
+ spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+ DPRINTK("EXIT\n");
}
void ata_qc_complete_internal(struct ata_queued_cmd *qc)
@@ -3465,6 +3512,9 @@ static void ata_qc_timeout(struct ata_qu
DPRINTK("ENTER\n");
+ ata_flush_pio_tasks(ap);
+ ap->hsm_task_state = HSM_ST_IDLE;
+
spin_lock_irqsave(&host_set->lock, flags);
switch (qc->tf.protocol) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 063221d..7837ba3 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -162,7 +162,8 @@ enum {
ATA_FLAG_PIO_LBA48 = (1 << 13), /* Host DMA engine is LBA28 only */
ATA_FLAG_IRQ_MASK = (1 << 14), /* Mask IRQ in PIO xfers */
- ATA_FLAG_IN_EH = (1 << 15), /* EH in progress */
+ ATA_FLAG_FLUSH_PIO_TASK = (1 << 15), /* Flush PIO task */
+ ATA_FLAG_IN_EH = (1 << 16), /* EH in progress */
ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
--
1.0.8
^ permalink raw reply related [flat|nested] 33+ messages in thread