linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] libata: improve ata_down_xfermask_limit()
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
  2007-02-02  7:09 ` [PATCH 2/5] libata: improve probe failure handling Tejun Heo
@ 2007-02-02  7:09 ` Tejun Heo
  2007-02-02  7:09 ` [PATCH 4/5] libata: kill ATA_DNXFER_ANY Tejun Heo
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

Make ata_down_xfermask_limit() accept @sel instead of @force_pio0.
@sel selects how the xfermask limit will be adjusted.  The following
selectors are defined.

* ATA_DNXFER_PIO	: only speed down PIO
* ATA_DNXFER_DMA	: only speed down DMA, don't cause transfer mode change
* ATA_DNXFER_40C	: apply 40c cable limit
* ATA_DNXFER_FORCE_PIO	: force PIO
* ATA_DNXFER_FORCE_PIO0	: force PIO0 (same as original with @force_pio0 == 1)
* ATA_DNXFER_ANY	: same as original with @force_pio0 == 0

Currently, only ANY and FORCE_PIO0 are used to maintain the original
behavior.  Other selectors will be used later to improve EH speed down
sequence.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/libata-core.c |  105 ++++++++++++++++++++++++++++++++++-----------
 drivers/ata/libata-eh.c   |    9 +++-
 drivers/ata/libata.h      |   12 +++++-
 3 files changed, 96 insertions(+), 30 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index a441c75..417394b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1747,6 +1747,7 @@ int ata_bus_probe(struct ata_port *ap)
 	int tries[ATA_MAX_DEVICES];
 	int i, rc, down_xfermask;
 	struct ata_device *dev;
+	int dnxfer_sel;
 
 	ata_port_probe(ap);
 
@@ -1828,13 +1829,15 @@ int ata_bus_probe(struct ata_port *ap)
 		/* fall through */
 	default:
 		tries[dev->devno]--;
-		if (down_xfermask &&
-		    ata_down_xfermask_limit(dev, tries[dev->devno] == 1))
+		dnxfer_sel = ATA_DNXFER_ANY;
+		if (tries[dev->devno] == 1)
+			dnxfer_sel = ATA_DNXFER_FORCE_PIO0;
+		if (down_xfermask && ata_down_xfermask_limit(dev, dnxfer_sel))
 			tries[dev->devno] = 0;
 	}
 
 	if (!tries[dev->devno]) {
-		ata_down_xfermask_limit(dev, 1);
+		ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0);
 		ata_dev_disable(dev);
 	}
 
@@ -2267,7 +2270,7 @@ int ata_timing_compute(struct ata_device *adev, unsigned short speed,
 /**
  *	ata_down_xfermask_limit - adjust dev xfer masks downward
  *	@dev: Device to adjust xfer masks
- *	@force_pio0: Force PIO0
+ *	@sel: ATA_DNXFER_* selector
  *
  *	Adjust xfer masks of @dev downward.  Note that this function
  *	does not apply the change.  Invoking ata_set_mode() afterwards
@@ -2279,37 +2282,87 @@ int ata_timing_compute(struct ata_device *adev, unsigned short speed,
  *	RETURNS:
  *	0 on success, negative errno on failure
  */
-int ata_down_xfermask_limit(struct ata_device *dev, int force_pio0)
+int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
 {
-	unsigned long xfer_mask;
-	int highbit;
+	char buf[32];
+	unsigned int orig_mask, xfer_mask;
+	unsigned int pio_mask, mwdma_mask, udma_mask;
+	int quiet, highbit;
 
-	xfer_mask = ata_pack_xfermask(dev->pio_mask, dev->mwdma_mask,
-				      dev->udma_mask);
+	quiet = !!(sel & ATA_DNXFER_QUIET);
+	sel &= ~ATA_DNXFER_QUIET;
 
-	if (!xfer_mask)
-		goto fail;
-	/* don't gear down to MWDMA from UDMA, go directly to PIO */
-	if (xfer_mask & ATA_MASK_UDMA)
-		xfer_mask &= ~ATA_MASK_MWDMA;
+	xfer_mask = orig_mask = ata_pack_xfermask(dev->pio_mask,
+						  dev->mwdma_mask,
+						  dev->udma_mask);
+	ata_unpack_xfermask(xfer_mask, &pio_mask, &mwdma_mask, &udma_mask);
 
-	highbit = fls(xfer_mask) - 1;
-	xfer_mask &= ~(1 << highbit);
-	if (force_pio0)
-		xfer_mask &= 1 << ATA_SHIFT_PIO;
-	if (!xfer_mask)
-		goto fail;
+	switch (sel) {
+	case ATA_DNXFER_PIO:
+		highbit = fls(pio_mask) - 1;
+		pio_mask &= ~(1 << highbit);
+		break;
+
+	case ATA_DNXFER_DMA:
+		if (udma_mask) {
+			highbit = fls(udma_mask) - 1;
+			udma_mask &= ~(1 << highbit);
+			if (!udma_mask)
+				return -ENOENT;
+		} else if (mwdma_mask) {
+			highbit = fls(mwdma_mask) - 1;
+			mwdma_mask &= ~(1 << highbit);
+			if (!mwdma_mask)
+				return -ENOENT;
+		}
+		break;
+
+	case ATA_DNXFER_40C:
+		udma_mask &= ATA_UDMA_MASK_40C;
+		break;
+
+	case ATA_DNXFER_FORCE_PIO0:
+		pio_mask &= 1;
+	case ATA_DNXFER_FORCE_PIO:
+		mwdma_mask = 0;
+		udma_mask = 0;
+		break;
+
+	case ATA_DNXFER_ANY:
+		/* don't gear down to MWDMA from UDMA, go directly to PIO */
+		if (xfer_mask & ATA_MASK_UDMA)
+			xfer_mask &= ~ATA_MASK_MWDMA;
+
+		highbit = fls(xfer_mask) - 1;
+		xfer_mask &= ~(1 << highbit);
+		break;
+
+	default:
+		BUG();
+	}
+
+	xfer_mask &= ata_pack_xfermask(pio_mask, mwdma_mask, udma_mask);
+
+	if (!(xfer_mask & ATA_MASK_PIO) || xfer_mask == orig_mask)
+		return -ENOENT;
+
+	if (!quiet) {
+		if (xfer_mask & (ATA_MASK_MWDMA | ATA_MASK_UDMA))
+			snprintf(buf, sizeof(buf), "%s:%s",
+				 ata_mode_string(xfer_mask),
+				 ata_mode_string(xfer_mask & ATA_MASK_PIO));
+		else
+			snprintf(buf, sizeof(buf), "%s",
+				 ata_mode_string(xfer_mask));
+
+		ata_dev_printk(dev, KERN_WARNING,
+			       "limiting speed to %s\n", buf);
+	}
 
 	ata_unpack_xfermask(xfer_mask, &dev->pio_mask, &dev->mwdma_mask,
 			    &dev->udma_mask);
 
-	ata_dev_printk(dev, KERN_WARNING, "limiting speed to %s\n",
-		       ata_mode_string(xfer_mask));
-
 	return 0;
-
- fail:
-	return -EINVAL;
 }
 
 static int ata_dev_set_mode(struct ata_device *dev)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 52c85af..7b61562 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1276,7 +1276,7 @@ static int ata_eh_speed_down(struct ata_device *dev, int is_io,
 		return ATA_EH_HARDRESET;
 
 	/* lower transfer mode */
-	if (ata_down_xfermask_limit(dev, 0) == 0)
+	if (ata_down_xfermask_limit(dev, ATA_DNXFER_ANY) == 0)
 		return ATA_EH_SOFTRESET;
 
 	ata_dev_printk(dev, KERN_ERR,
@@ -1965,6 +1965,7 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	struct ata_eh_context *ehc = &ap->eh_context;
 	struct ata_device *dev;
 	int down_xfermask, i, rc;
+	int dnxfer_sel;
 
 	DPRINTK("ENTER\n");
 
@@ -2064,8 +2065,10 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		sata_down_spd_limit(ap);
 	default:
 		ehc->tries[dev->devno]--;
-		if (down_xfermask &&
-		    ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
+		dnxfer_sel = ATA_DNXFER_ANY;
+		if (ehc->tries[dev->devno] == 1)
+			dnxfer_sel = ATA_DNXFER_FORCE_PIO0;
+		if (down_xfermask && ata_down_xfermask_limit(dev, dnxfer_sel))
 			ehc->tries[dev->devno] = 0;
 	}
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 06ccf23..36e08d2 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -41,6 +41,16 @@ struct ata_scsi_args {
 enum {
 	/* flags for ata_dev_read_id() */
 	ATA_READID_POSTRESET	= (1 << 0), /* reading ID after reset */
+
+	/* selector for ata_down_xfermask_limit() */
+	ATA_DNXFER_PIO		= 0,	/* speed down PIO */
+	ATA_DNXFER_DMA		= 1,	/* speed down DMA */
+	ATA_DNXFER_40C		= 2,	/* apply 40c cable limit */
+	ATA_DNXFER_FORCE_PIO	= 3,	/* force PIO */
+	ATA_DNXFER_FORCE_PIO0	= 4,	/* force PIO0 */
+	ATA_DNXFER_ANY		= 5,	/* speed down any */
+
+	ATA_DNXFER_QUIET	= (1 << 31),
 };
 
 extern struct workqueue_struct *ata_aux_wq;
@@ -68,7 +78,7 @@ extern int ata_dev_revalidate(struct ata_device *dev, unsigned int flags);
 extern int ata_dev_configure(struct ata_device *dev);
 extern int sata_down_spd_limit(struct ata_port *ap);
 extern int sata_set_spd_needed(struct ata_port *ap);
-extern int ata_down_xfermask_limit(struct ata_device *dev, int force_pio0);
+extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
 extern int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev);
 extern void ata_sg_clean(struct ata_queued_cmd *qc);
 extern void ata_qc_free(struct ata_queued_cmd *qc);
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/5] libata: improve probe failure handling
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
@ 2007-02-02  7:09 ` Tejun Heo
  2007-02-02  7:09 ` [PATCH 1/5] libata: improve ata_down_xfermask_limit() Tejun Heo
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

* Move forcing device to PIO0 on device disable into
  ata_dev_disable().  This makes both old and new EHs act the same
  way.

* Speed down only PIO mode on probe failure.  All commands used during
  probing are PIO commands.  There's no point in speeding down DMA.

* Retry at least once after -ENODEV.  Some devices report garbled
  IDENTIFY data after certain events.  This shouldn't cause device
  detach and re-attach.

* Rearrange EH failure path for simplicity.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/libata-core.c |   39 +++++++++++++++++++--------------------
 drivers/ata/libata-eh.c   |   35 ++++++++++++++++++-----------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 417394b..cbd4bf1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -596,6 +596,8 @@ void ata_dev_disable(struct ata_device *dev)
 {
 	if (ata_dev_enabled(dev) && ata_msg_drv(dev->ap)) {
 		ata_dev_printk(dev, KERN_WARNING, "disabled\n");
+		ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 |
+					     ATA_DNXFER_QUIET);
 		dev->class++;
 	}
 }
@@ -1745,9 +1747,8 @@ int ata_bus_probe(struct ata_port *ap)
 {
 	unsigned int classes[ATA_MAX_DEVICES];
 	int tries[ATA_MAX_DEVICES];
-	int i, rc, down_xfermask;
+	int i, rc;
 	struct ata_device *dev;
-	int dnxfer_sel;
 
 	ata_port_probe(ap);
 
@@ -1755,8 +1756,6 @@ int ata_bus_probe(struct ata_port *ap)
 		tries[i] = ATA_PROBE_MAX_TRIES;
 
  retry:
-	down_xfermask = 0;
-
 	/* reset and determine device classes */
 	ap->ops->phy_reset(ap);
 
@@ -1804,10 +1803,8 @@ int ata_bus_probe(struct ata_port *ap)
 
 	/* configure transfer mode */
 	rc = ata_set_mode(ap, &dev);
-	if (rc) {
-		down_xfermask = 1;
+	if (rc)
 		goto fail;
-	}
 
 	for (i = 0; i < ATA_MAX_DEVICES; i++)
 		if (ata_dev_enabled(&ap->device[i]))
@@ -1819,27 +1816,29 @@ int ata_bus_probe(struct ata_port *ap)
 	return -ENODEV;
 
  fail:
+	tries[dev->devno]--;
+
 	switch (rc) {
 	case -EINVAL:
-	case -ENODEV:
+		/* eeek, something went very wrong, give up */
 		tries[dev->devno] = 0;
 		break;
+
+	case -ENODEV:
+		/* give it just one more chance */
+		tries[dev->devno] = min(tries[dev->devno], 1);
 	case -EIO:
-		sata_down_spd_limit(ap);
-		/* fall through */
-	default:
-		tries[dev->devno]--;
-		dnxfer_sel = ATA_DNXFER_ANY;
-		if (tries[dev->devno] == 1)
-			dnxfer_sel = ATA_DNXFER_FORCE_PIO0;
-		if (down_xfermask && ata_down_xfermask_limit(dev, dnxfer_sel))
-			tries[dev->devno] = 0;
+		if (tries[dev->devno] == 1) {
+			/* This is the last chance, better to slow
+			 * down than lose it.
+			 */
+			sata_down_spd_limit(ap);
+			ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
+		}
 	}
 
-	if (!tries[dev->devno]) {
-		ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0);
+	if (!tries[dev->devno])
 		ata_dev_disable(dev);
-	}
 
 	goto retry;
 }
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 7b61562..1abfdba 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1964,8 +1964,7 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 {
 	struct ata_eh_context *ehc = &ap->eh_context;
 	struct ata_device *dev;
-	int down_xfermask, i, rc;
-	int dnxfer_sel;
+	int i, rc;
 
 	DPRINTK("ENTER\n");
 
@@ -1994,7 +1993,6 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	}
 
  retry:
-	down_xfermask = 0;
 	rc = 0;
 
 	/* if UNLOADING, finish immediately */
@@ -2039,10 +2037,8 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	/* configure transfer mode if necessary */
 	if (ehc->i.flags & ATA_EHI_SETMODE) {
 		rc = ata_set_mode(ap, &dev);
-		if (rc) {
-			down_xfermask = 1;
+		if (rc)
 			goto dev_fail;
-		}
 		ehc->i.flags &= ~ATA_EHI_SETMODE;
 	}
 
@@ -2054,22 +2050,27 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	goto out;
 
  dev_fail:
+	ehc->tries[dev->devno]--;
+
 	switch (rc) {
-	case -ENODEV:
-		/* device missing, schedule probing */
-		ehc->i.probe_mask |= (1 << dev->devno);
 	case -EINVAL:
+		/* eeek, something went very wrong, give up */
 		ehc->tries[dev->devno] = 0;
 		break;
+
+	case -ENODEV:
+		/* device missing or wrong IDENTIFY data, schedule probing */
+		ehc->i.probe_mask |= (1 << dev->devno);
+		/* give it just one more chance */
+		ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
 	case -EIO:
-		sata_down_spd_limit(ap);
-	default:
-		ehc->tries[dev->devno]--;
-		dnxfer_sel = ATA_DNXFER_ANY;
-		if (ehc->tries[dev->devno] == 1)
-			dnxfer_sel = ATA_DNXFER_FORCE_PIO0;
-		if (down_xfermask && ata_down_xfermask_limit(dev, dnxfer_sel))
-			ehc->tries[dev->devno] = 0;
+		if (ehc->tries[dev->devno] == 1) {
+			/* This is the last chance, better to slow
+			 * down than lose it.
+			 */
+			sata_down_spd_limit(ap);
+			ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
+		}
 	}
 
 	if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCHSET] ata_piix: improve combined mode handling
@ 2007-02-02  7:09 Tejun Heo
  2007-02-02  7:09 ` [PATCH 2/5] libata: improve probe failure handling Tejun Heo
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide, htejun

Subject: [PATCHSET] put some intelligence into speed down sequence

Hello,

The current EH speed down code is more of a feature demonstration and
goes through rdiculously many meaningless steps when condition is met.
This patchset tries to put some intelligence into speed down sequence.
The goal is to achieve reasonable number of speed down steps
reasonably spaced from one another and consider NCQ, cable type and
the current protocol when determining speed down steps, while not
bloating the code too much with nitty gritty details.

Roughly, the rules are...

1. If NCQ and protocol/timeout/unknown dev errors occur, turn off NCQ

2. If excessive transfer errors occur, speed down within the current
   transfer mode (UDMA/MWDMA/PIO).  If UDMA, it's first adjusted down
   a step, if error conditions persist, 40c limit is applied.  Speed
   down is done only twice.

3. If PATA && used up all DMA speed down steps && a LOT of
   transmission/unknown errors occur, switch to PIO.  So, we never
   automatically step down to PIO on SATA.  This is intended.  Some
   SATA hdd even seems to have problem with PIO data transfer
   commands.

The last patch makes ahci report HSM violation error on spurious
completion of NCQ commands, thus causing NCQ off after several such
incidents.  These drives should be blacklisted for DMA eventually.

This patchset is against...

  upstream (eb0e63cca36a3389f0ccab4584f6d479b983fad5)
+ [1] pata_platform-fix-devres-conversion
+ [2] libata-convert-to-iomap

Ric, I guess this resolves the to-do item from you which has been
sitting in my mailbox for way too long.  What do you think about the
rules?

Thanks.

--
tejun



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/5] libata: put some intelligence into EH speed down sequence
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
                   ` (3 preceding siblings ...)
  2007-02-02  7:09 ` [PATCH 5/5] ahci: consider SDB FIS containing spurious NCQ completions HSM violation Tejun Heo
@ 2007-02-02  7:09 ` Tejun Heo
  2007-02-02  7:10 ` Subject should be [PATCHSET] put some intelligence into " Tejun Heo
  5 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence.  The
rules are..

* If there have been more than three timeout, HSM violation or
  unclassified DEV errors for known supported commands during last 10
  mins, NCQ is turned off.

* If there have been more than three timeout or HSM violation for known
  supported command, transfer mode is slowed down.  If DMA is active,
  it is first slowered by one grade (e.g. UDMA133->100).  If that
  doesn't help, it's slowered to 40c limit (UDMA33).  If PIO is
  active, it's slowered by one grade first.  If that doesn't help,
  PIO0 is forced.  Note that this rule does not change transfer mode.
  DMA is never degraded into PIO by this rule.

* If there have been more than ten ATA bus, timeout, HSM violation or
  unclassified device errors for known supported commands && speeding
  down DMA mode didn't help, the device is forced into PIO mode.  Note
  that this rule is considered only for PATA devices and is pretty
  difficult to trigger.

One error can only trigger one rule at a time.  After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated.  This makes
sense because now speed down is done in bigger stride.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/libata-eh.c |  184 +++++++++++++++++++++++++++++++---------------
 include/linux/libata.h  |    1 +
 2 files changed, 125 insertions(+), 60 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 1abfdba..3173862 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -44,6 +44,12 @@
 
 #include "libata.h"
 
+enum {
+	ATA_EH_SPDN_NCQ_OFF		= (1 << 0),
+	ATA_EH_SPDN_SPEED_DOWN		= (1 << 1),
+	ATA_EH_SPDN_FALLBACK_TO_PIO	= (1 << 2),
+};
+
 static void __ata_port_freeze(struct ata_port *ap);
 static void ata_eh_finish(struct ata_port *ap);
 static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -65,12 +71,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io,
 	ent->timestamp = get_jiffies_64();
 }
 
-static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
+static void ata_ering_clear(struct ata_ering *ering)
 {
-	struct ata_ering_entry *ent = &ering->ring[ering->cursor];
-	if (!ent->err_mask)
-		return NULL;
-	return ent;
+	memset(ering, 0, sizeof(*ering));
 }
 
 static int ata_ering_map(struct ata_ering *ering,
@@ -1159,87 +1162,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
 	return action;
 }
 
-static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
+static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
 {
-	if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
+	if (err_mask & AC_ERR_ATA_BUS)
 		return 1;
 
-	if (ent->is_io) {
-		if (ent->err_mask & AC_ERR_HSM)
-			return 1;
-		if ((ent->err_mask &
-		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+	if (err_mask & AC_ERR_TIMEOUT)
+		return 2;
+
+	if (is_io) {
+		if (err_mask & AC_ERR_HSM)
 			return 2;
+		if ((err_mask &
+		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+			return 3;
 	}
 
 	return 0;
 }
 
-struct speed_down_needed_arg {
+struct speed_down_verdict_arg {
 	u64 since;
-	int nr_errors[3];
+	int nr_errors[4];
 };
 
-static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
+static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
 {
-	struct speed_down_needed_arg *arg = void_arg;
+	struct speed_down_verdict_arg *arg = void_arg;
+	int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);
 
 	if (ent->timestamp < arg->since)
 		return -1;
 
-	arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
+	arg->nr_errors[cat]++;
 	return 0;
 }
 
 /**
- *	ata_eh_speed_down_needed - Determine wheter speed down is necessary
+ *	ata_eh_speed_down_verdict - Determine speed down verdict
  *	@dev: Device of interest
  *
  *	This function examines error ring of @dev and determines
- *	whether speed down is necessary.  Speed down is necessary if
- *	there have been more than 3 of Cat-1 errors or 10 of Cat-2
- *	errors during last 15 minutes.
+ *	whether NCQ needs to be turned off, transfer speed should be
+ *	stepped down, or falling back to PIO is necessary.
+ *
+ *	Cat-1 is ATA_BUS error for any command.
  *
- *	Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
- *	violation for known supported commands.
+ *	Cat-2 is TIMEOUT for any command or HSM violation for known
+ *	supported commands.
  *
- *	Cat-2 errors are unclassified DEV error for known supported
+ *	Cat-3 is is unclassified DEV error for known supported
  *	command.
  *
+ *	NCQ needs to be turned off if there have been more than 3
+ *	Cat-2 + Cat-3 errors during last 10 minutes.
+ *
+ *	Speed down is necessary if there have been more than 3 Cat-1 +
+ *	Cat-2 errors or 10 Cat-3 errors during last 10 minutes.
+ *
+ *	Falling back to PIO mode is necessary if there have been more
+ *	than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes.
+ *
  *	LOCKING:
  *	Inherited from caller.
  *
  *	RETURNS:
- *	1 if speed down is necessary, 0 otherwise
+ *	OR of ATA_EH_SPDN_* flags.
  */
-static int ata_eh_speed_down_needed(struct ata_device *dev)
+static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
 {
-	const u64 interval = 15LLU * 60 * HZ;
-	static const int err_limits[3] = { -1, 3, 10 };
-	struct speed_down_needed_arg arg;
-	struct ata_ering_entry *ent;
-	int err_cat;
-	u64 j64;
+	const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
+	u64 j64 = get_jiffies_64();
+	struct speed_down_verdict_arg arg;
+	unsigned int verdict = 0;
 
-	ent = ata_ering_top(&dev->ering);
-	if (!ent)
-		return 0;
+	/* scan past 10 mins of error history */
+	memset(&arg, 0, sizeof(arg));
+	arg.since = j64 - min(j64, j10mins);
+	ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
 
-	err_cat = ata_eh_categorize_ering_entry(ent);
-	if (err_cat == 0)
-		return 0;
+	if (arg.nr_errors[2] + arg.nr_errors[3] > 3)
+		verdict |= ATA_EH_SPDN_NCQ_OFF;
+	if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)
+		verdict |= ATA_EH_SPDN_SPEED_DOWN;
 
+	/* scan past 3 mins of error history */
 	memset(&arg, 0, sizeof(arg));
+	arg.since = j64 - min(j64, j5mins);
+	ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
 
-	j64 = get_jiffies_64();
-	if (j64 >= interval)
-		arg.since = j64 - interval;
-	else
-		arg.since = 0;
-
-	ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
+	if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10)
+		verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
 
-	return arg.nr_errors[err_cat] > err_limits[err_cat];
+	return verdict;
 }
 
 /**
@@ -1257,31 +1272,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev)
  *	Kernel thread context (may sleep).
  *
  *	RETURNS:
- *	0 on success, -errno otherwise
+ *	Determined recovery action.
  */
-static int ata_eh_speed_down(struct ata_device *dev, int is_io,
-			     unsigned int err_mask)
+static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
+				      unsigned int err_mask)
 {
-	if (!err_mask)
+	unsigned int verdict;
+	unsigned int action = 0;
+
+	/* don't bother if Cat-0 error */
+	if (ata_eh_categorize_error(is_io, err_mask) == 0)
 		return 0;
 
 	/* record error and determine whether speed down is necessary */
 	ata_ering_record(&dev->ering, is_io, err_mask);
+	verdict = ata_eh_speed_down_verdict(dev);
 
-	if (!ata_eh_speed_down_needed(dev))
-		return 0;
+	/* turn off NCQ? */
+	if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
+	    (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
+			   ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
+		dev->flags |= ATA_DFLAG_NCQ_OFF;
+		ata_dev_printk(dev, KERN_WARNING,
+			       "NCQ disabled due to excessive errors\n");
+		goto done;
+	}
+
+	/* speed down? */
+	if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
+		/* speed down SATA link speed if possible */
+		if (sata_down_spd_limit(dev->ap) == 0) {
+			action |= ATA_EH_HARDRESET;
+			goto done;
+		}
 
-	/* speed down SATA link speed if possible */
-	if (sata_down_spd_limit(dev->ap) == 0)
-		return ATA_EH_HARDRESET;
+		/* lower transfer mode */
+		if (dev->spdn_cnt < 2) {
+			static const int dma_dnxfer_sel[] =
+				{ ATA_DNXFER_DMA, ATA_DNXFER_40C };
+			static const int pio_dnxfer_sel[] =
+				{ ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
+			int sel;
 
-	/* lower transfer mode */
-	if (ata_down_xfermask_limit(dev, ATA_DNXFER_ANY) == 0)
-		return ATA_EH_SOFTRESET;
+			if (dev->xfer_shift != ATA_SHIFT_PIO)
+				sel = dma_dnxfer_sel[dev->spdn_cnt];
+			else
+				sel = pio_dnxfer_sel[dev->spdn_cnt];
+
+			dev->spdn_cnt++;
+
+			if (ata_down_xfermask_limit(dev, sel) == 0) {
+				action |= ATA_EH_SOFTRESET;
+				goto done;
+			}
+		}
+	}
+
+	/* Fall back to PIO?  Slowing down to PIO is meaningless for
+	 * SATA.  Consider it only for PATA.
+	 */
+	if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
+	    (dev->ap->cbl != ATA_CBL_SATA) &&
+	    (dev->xfer_shift != ATA_SHIFT_PIO)) {
+		if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
+			dev->spdn_cnt = 0;
+			action |= ATA_EH_SOFTRESET;
+			goto done;
+		}
+	}
 
-	ata_dev_printk(dev, KERN_ERR,
-		       "speed down requested but no transfer mode left\n");
 	return 0;
+ done:
+	/* device has been slowed down, blow error history */
+	ata_ering_clear(&dev->ering);
+	return action;
 }
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4c3ed59..1b3963f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -495,6 +495,7 @@ struct ata_device {
 
 	/* error history */
 	struct ata_ering	ering;
+	int			spdn_cnt;
 	unsigned int		horkage;	/* List of broken features */
 };
 
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/5] ahci: consider SDB FIS containing spurious NCQ completions HSM violation
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
                   ` (2 preceding siblings ...)
  2007-02-02  7:09 ` [PATCH 4/5] libata: kill ATA_DNXFER_ANY Tejun Heo
@ 2007-02-02  7:09 ` Tejun Heo
  2007-02-02  7:09 ` [PATCH 3/5] libata: put some intelligence into EH speed down sequence Tejun Heo
  2007-02-02  7:10 ` Subject should be [PATCHSET] put some intelligence into " Tejun Heo
  5 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

SDB FIS containing spurious NCQ completions is a clear protocol
violation.  Currently, only some Maxtors with early firmware revisions
are showing this problem.  Those firmwares have other NCQ related
problems including buggy NCQ error reporting and occasional lock up
after NCQ errors.

Consider spurious NCQ completions HSM violation and freeze the port
after it.  EH will turn off NCQ after this happens several times.
Eventually drives which show this behavior should be blacklisted for
NCQ.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/ahci.c |   26 +++++++++++++-------------
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8f1d753..1d8d377 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -199,7 +199,6 @@ struct ahci_port_priv {
 	void			*rx_fis;
 	dma_addr_t		rx_fis_dma;
 	/* for NCQ spurious interrupt analysis */
-	int			ncq_saw_spurious_sdb_cnt;
 	unsigned int		ncq_saw_d2h:1;
 	unsigned int		ncq_saw_dmas:1;
 };
@@ -1157,23 +1156,24 @@ static void ahci_host_intr(struct ata_port *ap)
 		known_irq = 1;
 	}
 
-	if (status & PORT_IRQ_SDB_FIS &&
-		   pp->ncq_saw_spurious_sdb_cnt < 10) {
+	if (status & PORT_IRQ_SDB_FIS) {
 		/* SDB FIS containing spurious completions might be
-		 * dangerous, we need to know more about them.  Print
-		 * more of it.
+		 * dangerous, whine and fail commands with HSM
+		 * violation.  EH will turn off NCQ after several such
+		 * failures.
 		 */
 		const u32 *f = pp->rx_fis + RX_FIS_SDB;
 
-		ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
-				"issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
-				readl(port_mmio + PORT_CMD_ISSUE),
-				readl(port_mmio + PORT_SCR_ACT),
-				le32_to_cpu(f[0]), le32_to_cpu(f[1]),
-				pp->ncq_saw_spurious_sdb_cnt < 10 ?
-				"" : ", shutting up");
+		ata_ehi_push_desc(ehi, "spurious completion during NCQ "
+				  "issue=0x%x SAct=0x%x FIS=%08x:%08x",
+				  readl(port_mmio + PORT_CMD_ISSUE),
+				  readl(port_mmio + PORT_SCR_ACT),
+				  le32_to_cpu(f[0]), le32_to_cpu(f[1]));
+
+		ehi->err_mask |= AC_ERR_HSM;
+		ehi->action |= ATA_EH_SOFTRESET;
+		ata_port_freeze(ap);
 
-		pp->ncq_saw_spurious_sdb_cnt++;
 		known_irq = 1;
 	}
 
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/5] libata: kill ATA_DNXFER_ANY
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
  2007-02-02  7:09 ` [PATCH 2/5] libata: improve probe failure handling Tejun Heo
  2007-02-02  7:09 ` [PATCH 1/5] libata: improve ata_down_xfermask_limit() Tejun Heo
@ 2007-02-02  7:09 ` Tejun Heo
  2007-02-02  7:09 ` [PATCH 5/5] ahci: consider SDB FIS containing spurious NCQ completions HSM violation Tejun Heo
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:09 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

ATA_DNXFER_ANY isn't used anymore.  Kill it.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/libata-core.c |    9 ---------
 drivers/ata/libata.h      |    1 -
 2 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index cbd4bf1..651ab4b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2327,15 +2327,6 @@ int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
 		udma_mask = 0;
 		break;
 
-	case ATA_DNXFER_ANY:
-		/* don't gear down to MWDMA from UDMA, go directly to PIO */
-		if (xfer_mask & ATA_MASK_UDMA)
-			xfer_mask &= ~ATA_MASK_MWDMA;
-
-		highbit = fls(xfer_mask) - 1;
-		xfer_mask &= ~(1 << highbit);
-		break;
-
 	default:
 		BUG();
 	}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 36e08d2..6fc6260 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -48,7 +48,6 @@ enum {
 	ATA_DNXFER_40C		= 2,	/* apply 40c cable limit */
 	ATA_DNXFER_FORCE_PIO	= 3,	/* force PIO */
 	ATA_DNXFER_FORCE_PIO0	= 4,	/* force PIO0 */
-	ATA_DNXFER_ANY		= 5,	/* speed down any */
 
 	ATA_DNXFER_QUIET	= (1 << 31),
 };
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Subject should be [PATCHSET]  put some intelligence into speed down sequence
  2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
                   ` (4 preceding siblings ...)
  2007-02-02  7:09 ` [PATCH 3/5] libata: put some intelligence into EH speed down sequence Tejun Heo
@ 2007-02-02  7:10 ` Tejun Heo
  2007-02-02  7:12   ` Please ignore this thread. I'll resend Tejun Heo
  5 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:10 UTC (permalink / raw)
  To: Tejun Heo; +Cc: jeff, alan, ric, edmudama, linux-ide

Eek, sorry.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Please ignore this thread.  I'll resend.
  2007-02-02  7:10 ` Subject should be [PATCHSET] put some intelligence into " Tejun Heo
@ 2007-02-02  7:12   ` Tejun Heo
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:12 UTC (permalink / raw)
  To: jeff; +Cc: alan, ric, edmudama, linux-ide

Tejun Heo wrote:
> Eek, sorry.
> 


-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 4/5] libata: kill ATA_DNXFER_ANY
  2007-02-02  7:22 asdf Tejun Heo
@ 2007-02-02  7:22 ` Tejun Heo
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2007-02-02  7:22 UTC (permalink / raw)
  To: jeff, alan, ric, edmudama, linux-ide; +Cc: Tejun Heo

ATA_DNXFER_ANY isn't used anymore.  Kill it.

Signed-off-by: Tejun Heo <htejun@gmail.com>
---
 drivers/ata/libata-core.c |    9 ---------
 drivers/ata/libata.h      |    1 -
 2 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index cbd4bf1..651ab4b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2327,15 +2327,6 @@ int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
 		udma_mask = 0;
 		break;
 
-	case ATA_DNXFER_ANY:
-		/* don't gear down to MWDMA from UDMA, go directly to PIO */
-		if (xfer_mask & ATA_MASK_UDMA)
-			xfer_mask &= ~ATA_MASK_MWDMA;
-
-		highbit = fls(xfer_mask) - 1;
-		xfer_mask &= ~(1 << highbit);
-		break;
-
 	default:
 		BUG();
 	}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 36e08d2..6fc6260 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -48,7 +48,6 @@ enum {
 	ATA_DNXFER_40C		= 2,	/* apply 40c cable limit */
 	ATA_DNXFER_FORCE_PIO	= 3,	/* force PIO */
 	ATA_DNXFER_FORCE_PIO0	= 4,	/* force PIO0 */
-	ATA_DNXFER_ANY		= 5,	/* speed down any */
 
 	ATA_DNXFER_QUIET	= (1 << 31),
 };
-- 
1.4.4.4



^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-02-02  7:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-02-02  7:09 [PATCHSET] ata_piix: improve combined mode handling Tejun Heo
2007-02-02  7:09 ` [PATCH 2/5] libata: improve probe failure handling Tejun Heo
2007-02-02  7:09 ` [PATCH 1/5] libata: improve ata_down_xfermask_limit() Tejun Heo
2007-02-02  7:09 ` [PATCH 4/5] libata: kill ATA_DNXFER_ANY Tejun Heo
2007-02-02  7:09 ` [PATCH 5/5] ahci: consider SDB FIS containing spurious NCQ completions HSM violation Tejun Heo
2007-02-02  7:09 ` [PATCH 3/5] libata: put some intelligence into EH speed down sequence Tejun Heo
2007-02-02  7:10 ` Subject should be [PATCHSET] put some intelligence into " Tejun Heo
2007-02-02  7:12   ` Please ignore this thread. I'll resend Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2007-02-02  7:22 asdf Tejun Heo
2007-02-02  7:22 ` [PATCH 4/5] libata: kill ATA_DNXFER_ANY Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).