linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 03/13] libata: add per-dev ata_ering
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (3 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 02/13] libata: implement ata_ering Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 07/13] libata: implement ata_eh_report() Tejun Heo
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

EH is gonna record errors per-dev.  Define per-dev ata_ering.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 +
 include/linux/libata.h     |    4 ++++
 2 files changed, 5 insertions(+), 0 deletions(-)

ca065c0f8bb2d545dc3abe83101c4d714f72dee9
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index fc9ae17..e6dec59 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4762,6 +4762,7 @@ static void ata_host_init(struct ata_por
 		dev->pio_mask = UINT_MAX;
 		dev->mwdma_mask = UINT_MAX;
 		dev->udma_mask = UINT_MAX;
+		ata_ering_init(&dev->ering, ATA_DEV_ERING_SIZE);
 	}
 
 #ifdef ATA_IRQ_TRAP
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e4480f2..7eeb023 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -136,6 +136,8 @@ enum {
 	ATA_DEV_ATAPI_UNSUP	= 4,	/* ATAPI device (unsupported) */
 	ATA_DEV_NONE		= 5,	/* no device */
 
+	ATA_DEV_ERING_SIZE	= 32,	/* record 32 recent errors */
+
 	/* struct ata_port flags */
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
@@ -412,6 +414,8 @@ struct ata_device {
 	u16			cylinders;	/* Number of cylinders */
 	u16			heads;		/* Number of heads */
 	u16			sectors;	/* Number of sectors per track */
+
+	DEFINE_ATA_ERING	(ering, ATA_DEV_ERING_SIZE);
 };
 
 struct ata_port {
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 04/13] libata: implement EH utility functions
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
  2006-04-03  3:44 ` [PATCH 01/13] libata: add constants and flags to be used by EH Tejun Heo
  2006-04-03  3:44 ` [PATCH 05/13] libata: implement ata_eh_determine_qc() Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 02/13] libata: implement ata_ering Tejun Heo
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement two utility functions ata_err_string() and
atapi_eh_request_sense().  They will be used by EH.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-eh.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 92 insertions(+), 0 deletions(-)

59bc14437422f5faeddf7c6a3a745738b0f51202
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index b5825b7..c4ef2dd 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -434,3 +434,95 @@ void ata_eh_qc_retry(struct ata_queued_c
 		scmd->retries--;
 	__ata_eh_qc_complete(qc);
 }
+
+/**
+ *	ata_err_string - convert err_mask to descriptive string
+ *	@err_mask: error mask to convert to string
+ *
+ *	Convert @err_mask to descriptive string.  Errors are
+ *	prioritized according to severity and only the most severe
+ *	error is reported.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	Descriptive string for @err_mask
+ */
+static const char * ata_err_string(unsigned int err_mask)
+{
+	if (err_mask & AC_ERR_HOST_BUS)
+		return "host bus error";
+	if (err_mask & AC_ERR_ATA_BUS)
+		return "ATA bus error";
+	if (err_mask & AC_ERR_TIMEOUT)
+		return "timeout";
+	if (err_mask & AC_ERR_HSM)
+		return "host state machine violation";
+	if (err_mask & AC_ERR_SYSTEM)
+		return "host internal error";
+	if (err_mask & AC_ERR_MEDIA)
+		return "media error";
+	if (err_mask & AC_ERR_INVALID)
+		return "invalid argument error";
+	if (err_mask & AC_ERR_DEV)
+		return "unclassified device error";
+	return "unknown error";
+}
+
+/**
+ *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
+ *	@ap: port associated with device @dev
+ *	@dev: device to perform REQUEST_SENSE to
+ *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
+ *
+ *	Perform ATAPI REQUEST_SENSE after the device reported CHECK
+ *	SENSE.  This function is EH helper.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	0 on success, AC_ERR_* mask on failure
+ */
+static unsigned int atapi_eh_request_sense(struct ata_port *ap,
+					   struct ata_device *dev,
+					   unsigned char *sense_buf)
+{
+	struct ata_taskfile tf;
+	u8 cdb[ATAPI_CDB_LEN];
+
+	DPRINTK("ATAPI request sense\n");
+
+	ata_tf_init(ap, &tf, dev->devno);
+
+	/* FIXME: is this needed? */
+	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
+
+	/* XXX: why tf_read here? */
+	ap->ops->tf_read(ap, &tf);
+
+	/* fill these in, for the case where they are -not- overwritten */
+	sense_buf[0] = 0x70;
+	sense_buf[2] = tf.feature >> 4;
+
+	memset(cdb, 0, ATAPI_CDB_LEN);
+	cdb[0] = REQUEST_SENSE;
+	cdb[4] = SCSI_SENSE_BUFFERSIZE;
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+
+	/* is it pointless to prefer PIO for "safety reasons"? */
+	if (ap->flags & ATA_FLAG_PIO_DMA) {
+		tf.protocol = ATA_PROT_ATAPI_DMA;
+		tf.feature |= ATAPI_PKT_DMA;
+	} else {
+		tf.protocol = ATA_PROT_ATAPI;
+		tf.lbam = (8 * 1024) & 0xff;
+		tf.lbah = (8 * 1024) >> 8;
+	}
+
+	return ata_exec_internal(ap, dev, &tf, cdb, DMA_FROM_DEVICE,
+				 sense_buf, SCSI_SENSE_BUFFERSIZE);
+}
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 05/13] libata: implement ata_eh_determine_qc()
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
  2006-04-03  3:44 ` [PATCH 01/13] libata: add constants and flags to be used by EH Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 04/13] libata: implement EH utility functions Tejun Heo
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH helper ata_eh_determine_qc().  This function determines
which is the offending qc and loads TF registers for the qc.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 +
 drivers/scsi/libata-eh.c   |   24 ++++++++++++++++++++++++
 include/linux/libata.h     |    2 ++
 3 files changed, 27 insertions(+), 0 deletions(-)

3864377b3160cd8523dd6d752b6dd79cc3464967
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index e6dec59..06bf7f4 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5279,3 +5279,4 @@ EXPORT_SYMBOL_GPL(ata_eng_timeout);
 EXPORT_SYMBOL_GPL(ata_eh_schedule_port);
 EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
 EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
+EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index c4ef2dd..7781f67 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -526,3 +526,27 @@ static unsigned int atapi_eh_request_sen
 	return ata_exec_internal(ap, dev, &tf, cdb, DMA_FROM_DEVICE,
 				 sense_buf, SCSI_SENSE_BUFFERSIZE);
 }
+
+/**
+ *	ata_eh_determine_qc - Determine which qc caused error
+ *	@ap: port which failed
+ *	@tf: resulting taskfile registers of the failed command
+ *
+ *	Determine which qc caused failure and read associated tf
+ *	registers.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	Pointer to the failed qc.
+ */
+struct ata_queued_cmd * ata_eh_determine_qc(struct ata_port *ap,
+					    struct ata_taskfile *tf)
+{
+	memset(tf, 0, sizeof(*tf));
+	ap->ops->tf_read(ap, tf);
+
+	return __ata_qc_from_tag(ap, ap->active_tag);
+}
+
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7eeb023..789dd75 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -684,6 +684,8 @@ extern void ata_eng_timeout(struct ata_p
 extern void ata_eh_schedule_port(struct ata_port *ap, unsigned int flags);
 extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
 extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
+extern struct ata_queued_cmd * ata_eh_determine_qc(struct ata_port *ap,
+						   struct ata_taskfile *tf);
 
 
 static inline int
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 01/13] libata: add constants and flags to be used by EH
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 05/13] libata: implement ata_eh_determine_qc() Tejun Heo
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Add constants and flags to be used by EH.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 include/linux/ata.h    |   11 +++++++++++
 include/linux/libata.h |    5 +++++
 2 files changed, 16 insertions(+), 0 deletions(-)

4aea2da11e6a1d39b417af71e881601cbb19387f
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 312a2c0..283138f 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -97,6 +97,9 @@ enum {
 	ATA_DRQ			= (1 << 3),	/* data request i/o */
 	ATA_ERR			= (1 << 0),	/* have an error */
 	ATA_SRST		= (1 << 2),	/* software reset */
+	ATA_ICRC		= (1 << 7),	/* interface CRC error */
+	ATA_UNC			= (1 << 6),	/* uncorrectable media error */
+	ATA_IDNF		= (1 << 4),	/* ID not found */
 	ATA_ABORTED		= (1 << 2),	/* command aborted */
 
 	/* ATA command block registers */
@@ -192,6 +195,14 @@ enum {
 	SCR_ACTIVE		= 3,
 	SCR_NOTIFICATION	= 4,
 
+	/* SError bits */
+	SERR_DATA_RECOVERED	= (1 << 0), /* recovered data error */
+	SERR_COMM_RECOVERED	= (1 << 1), /* recovered comm failure */
+	SERR_DATA		= (1 << 8), /* unrecovered data error */
+	SERR_PERSISTENT		= (1 << 9), /* persistent data/comm error */
+	SERR_PROTOCOL		= (1 << 10), /* protocol violation */
+	SERR_INTERNAL		= (1 << 11), /* host internal error */
+
 	/* struct ata_taskfile flags */
 	ATA_TFLAG_LBA48		= (1 << 0), /* enable 48-bit LBA and "HOB" */
 	ATA_TFLAG_ISADDR	= (1 << 1), /* enable r/w to nsect/lba regs */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c65cda9..19d3ecc 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -223,6 +223,11 @@ enum {
 	ATA_PORT_PRIMARY	= (1 << 0),
 	ATA_PORT_SECONDARY	= (1 << 1),
 
+	/* reset / recovery action types */
+	ATA_PORT_REVALIDATE	= (1 << 0),
+	ATA_PORT_SOFTRESET	= (1 << 1),
+	ATA_PORT_HARDRESET	= (1 << 2),
+
 	/* flags for ata_eh_shduled_port */
 	ATA_EH_ABORT		= (1 << 0), /* abort all active commands */
 	ATA_EH_FREEZE		= (1 << 1), /* freeze port (implies ABORT) */
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCHSET] new EH implementation
@ 2006-04-03  3:44 Tejun Heo
  2006-04-03  3:44 ` [PATCH 01/13] libata: add constants and flags to be used by EH Tejun Heo
                   ` (12 more replies)
  0 siblings, 13 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide, htejun

Hello, all.

This is the second half of new EH - actual EH implementation.  This
patchset contains thirteen patches and against.

  upstream [1]
  + scsi_eh_schedule patchset, take 2 [2][3]
  + ahci softreset presence detection patch [4]
  + eh-framework patchset [5]

#01-04	Prep for EH.  Add constants, implement ering and utility functions
#05-09	Implement EH helpers.  These are the backbones of new EH.
#10	Implement stock BMDMA EH using EH helpers.
#11-13	Convert ata_piix, sata_sil and ahci to new EH

Thanks.

--
tejun

[1] 6d5f9732a16a74d75f8cdba5b00557662e83f466
[2] http://marc.theaimsgroup.com/?l=linux-scsi&m=114399387517874&w=2
[3] http://marc.theaimsgroup.com/?l=linux-ide&m=114399407718154&w=2
[4] http://marc.theaimsgroup.com/?l=linux-ide&m=114399712126232&w=2
[5] http://article.gmane.org/gmane.linux.ide/9311



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 02/13] libata: implement ata_ering
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (2 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 04/13] libata: implement EH utility functions Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 03/13] libata: add per-dev ata_ering Tejun Heo
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

ata_ering is a ring buffer which records libata errors - whether a
command was for normar IO request, err_mask and timestamp.  This will
be used by EH to determine recovery actions.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-eh.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libata.h    |    1 +
 include/linux/libata.h   |   16 ++++++++++++++
 3 files changed, 68 insertions(+), 0 deletions(-)

b92a795c5b84f3840388da95e2d4675d976c9e66
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index dfbb8c4..b5825b7 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -44,6 +44,57 @@
 
 #include "libata.h"
 
+void ata_ering_init(struct ata_ering *ering, int size)
+{
+	memset(ering, 0, sizeof(*ering) + sizeof(ering->ring[0]) * size);
+	ering->size = size;
+}
+
+static void ata_ering_record(struct ata_ering *ering, int is_io,
+			     unsigned int err_mask)
+{
+	struct ata_ering_entry *ent;
+
+	WARN_ON(!err_mask);
+
+	ering->cursor++;
+	ering->cursor %= ering->size;
+
+	ent = &ering->ring[ering->cursor];
+	ent->is_io = is_io;
+	ent->err_mask = err_mask;
+	ent->timestamp = get_jiffies_64();
+}
+
+static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
+{
+	struct ata_ering_entry *ent = &ering->ring[ering->cursor];
+	if (!ent->err_mask)
+		return NULL;
+	return ent;
+}
+
+static int ata_ering_map(struct ata_ering *ering,
+			 int (*map_fn)(struct ata_ering_entry *, void *),
+			 void *arg)
+{
+	int idx, rc = 0;
+	struct ata_ering_entry *ent;
+
+	idx = ering->cursor;
+	do {
+		ent = &ering->ring[idx];
+		if (!ent->err_mask)
+			break;
+		rc = map_fn(ent, arg);
+		if (rc)
+			break;
+		idx = (idx - 1 + ering->size) % ering->size;
+	} while (idx != ering->cursor);
+
+	return rc;
+}
+
 /**
  *	ata_scsi_timed_out - SCSI layer time out callback
  *	@cmd: timed out SCSI command
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
index 826010c..97eeacb 100644
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -105,6 +105,7 @@ extern void ata_scsi_rbuf_fill(struct at
                                            u8 *rbuf, unsigned int buflen));
 
 /* libata-eh.c */
+extern void ata_ering_init(struct ata_ering *ering, int size);
 extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern void ata_eh_schedule_qc(struct ata_queued_cmd *qc);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 19d3ecc..e4480f2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -371,6 +371,22 @@ struct ata_host_stats {
 	unsigned long		rw_reqbuf;
 };
 
+struct ata_ering_entry {
+	int			is_io;
+	unsigned int		err_mask;
+	u64			timestamp;
+};
+
+struct ata_ering {
+	int			cursor;
+	int			size;
+	struct ata_ering_entry	ring[];
+};
+
+#define DEFINE_ATA_ERING(name, size)	\
+	struct ata_ering	name;	\
+	struct ata_ering_entry	name_entries[size];
+
 struct ata_device {
 	u64			n_sectors;	/* size of device, if ATA */
 	unsigned long		flags;		/* ATA_DFLAG_xxx */
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 11/13] ata_piix: convert to new EH
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (7 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 13/13] ahci: convert to new EH Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 08/13] libata: implement ata_eh_revive() Tejun Heo
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

ata_piix can use stock BMDMA EH routines.  Convert to new EH.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/ata_piix.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

b1f5e082114555527205661d02f4f3876a1ae650
diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 24e71b5..13cc588 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -244,7 +244,9 @@ static const struct ata_port_operations 
 	.qc_prep		= ata_qc_prep,
 	.qc_issue		= ata_qc_issue_prot,
 
-	.eng_timeout		= ata_eng_timeout,
+	.freeze			= ata_bmdma_freeze,
+	.error_handler		= ata_bmdma_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
 
 	.irq_handler		= ata_interrupt,
 	.irq_clear		= ata_bmdma_irq_clear,
@@ -272,7 +274,9 @@ static const struct ata_port_operations 
 	.qc_prep		= ata_qc_prep,
 	.qc_issue		= ata_qc_issue_prot,
 
-	.eng_timeout		= ata_eng_timeout,
+	.freeze			= ata_bmdma_freeze,
+	.error_handler		= ata_bmdma_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
 
 	.irq_handler		= ata_interrupt,
 	.irq_clear		= ata_bmdma_irq_clear,
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 08/13] libata: implement ata_eh_revive()
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (8 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 11/13] ata_piix: " Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  7:42   ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 12/13] sata_sil: convert to new EH Tejun Heo
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH helper function ata_eh_revive().  This function executes
what ata_eh_autopsy() and other parts of EH determined necessary to
resurrect the port.  As in ata_bus_probe(), each device is given fixed
number (ATA_EH_MAX_TRIES) of chances.  If a device uses up all its
chances and still fail to recover, it gets disabled.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 
 drivers/scsi/libata-eh.c   |  149 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    4 +
 3 files changed, 154 insertions(+), 0 deletions(-)

cbae2411028b7f48070809527cb1d7475635faad
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index dbf6815..62dabb1 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5282,3 +5282,4 @@ EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
 EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
 EXPORT_SYMBOL_GPL(ata_eh_autopsy);
 EXPORT_SYMBOL_GPL(ata_eh_report);
+EXPORT_SYMBOL_GPL(ata_eh_revive);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 7b8b2d8..3fc3f0f 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -880,3 +880,152 @@ void ata_eh_report(struct ata_port *ap, 
 	       tf->command, tf->feature, serror, action,
 	       desc_head, desc, desc_tail);
 }
+
+/**
+ *	ata_eh_revive - revive host port after error
+ *	@ap: host port to revive
+ *	@action: action to perform to revive @ap
+ *	@softreset: softreset method (can be NULL)
+ *	@hardreset: hardreset method (can be NULL)
+ *	@postreset: postreset method (can be NULL)
+ *
+ *	Perform action specified by @action to revive host port @ap
+ *	after error.
+ * *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+int ata_eh_revive(struct ata_port *ap, unsigned int action,
+		  ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+		  ata_postreset_fn_t postreset)
+{
+	unsigned int classes[ATA_MAX_DEVICES];
+	int tries[ATA_MAX_DEVICES], reset_tries;
+	int scr_valid = ap->flags & ATA_FLAG_SATA && ap->ops->scr_read;
+	struct ata_device *dev;
+	ata_reset_fn_t reset;
+	int i, down_xfermask, rc = 0;
+
+	if (!action)
+		goto out;
+
+	reset_tries = ATA_EH_MAX_TRIES;
+	for (i = 0; i < ATA_MAX_DEVICES; i++)
+		tries[i] = ATA_EH_MAX_TRIES;
+
+	/* revalidate */
+	if (action == ATA_PORT_REVALIDATE) {
+		for (i = 0; i < ATA_MAX_DEVICES; i++) {
+			struct ata_device *dev = &ap->device[i];
+			if (!ata_dev_enabled(dev) ||
+			    !(dev->flags & ATA_DFLAG_FAILED))
+				continue;
+			if (ata_dev_revalidate(ap, dev, 0))
+				break;
+		}
+		if (i == ATA_MAX_DEVICES) {
+			rc = 0;
+			goto out;
+		}
+
+		action |= ATA_PORT_SOFTRESET;
+	}
+	action &= ~ATA_PORT_REVALIDATE;
+
+	/* Give devices time to get ready before trying the first
+	 * reset.  Without this, devices tend to fail the first reset
+	 * under certain circumstances and cause much longer delay.
+	 */
+	if (scr_valid && sata_dev_present(ap)) {
+		unsigned long timeout = jiffies + 5 * HZ;
+		while (time_before(jiffies, timeout) &&
+		       ata_chk_status(ap) & ATA_BUSY)
+			ssleep(1);
+	}
+
+	if (softreset && (!hardreset || (!ata_set_sata_spd_needed(ap) &&
+					 action == ATA_PORT_SOFTRESET)))
+		reset = softreset;
+	else
+		reset = hardreset;
+
+ retry:
+	down_xfermask = 0;
+
+	/* reset.  postreset is responsible for thawing the port. */
+	printk("ata%u: %s resetting channel for error handling\n",
+	       ap->id, reset == softreset ? "soft" : "hard");
+
+	rc = ata_do_reset(ap, reset, postreset, 1, classes);
+	if (rc)
+		goto fail_reset;
+
+	/* revalidate and reconfigure devices */
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+
+		if (!tries[i])
+			ata_dev_disable(ap, dev);
+
+		if (!ata_dev_enabled(dev))
+			continue;
+
+		rc = ata_dev_revalidate(ap, dev, 1);
+		if (rc)
+			goto fail;
+	}
+
+	/* configure transfer mode */
+	rc = ata_set_mode(ap, &dev);
+	if (rc) {
+		down_xfermask = 1;
+		goto fail;
+	}
+
+	goto out;
+
+ fail_reset:
+	if (!--reset_tries)
+		goto out;
+	if (reset == hardreset)
+		ata_down_sata_spd_limit(ap);
+	if (hardreset)
+		reset = hardreset;
+
+	printk(KERN_WARNING "ata%u: reset failed, will retry in 5 secs\n",
+	       ap->id);
+	ssleep(5);
+	goto retry;
+
+ fail:
+	switch (rc) {
+	case -EINVAL:
+	case -ENODEV:
+		tries[dev->devno] = 0;
+		break;
+	case -EIO:
+		ata_down_sata_spd_limit(ap);
+	default:
+		tries[dev->devno]--;
+		if (down_xfermask &&
+		    ata_down_xfermask_limit(ap, dev, tries[dev->devno] == 1))
+			tries[dev->devno] = 0;
+	}
+
+	if (hardreset)
+		reset = hardreset;
+
+	printk(KERN_WARNING "ata%u: some devices seem to be offline, will "
+	       "retry in 5 secs\n", ap->id);
+	ssleep(5);
+	goto retry;
+
+ out:
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+		dev->flags &= ~ATA_DFLAG_FAILED;
+		if (rc)
+			ata_dev_disable(ap, dev);
+	}
+
+	return rc;
+}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 437c5ad..ed28a1d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -236,6 +236,7 @@ enum {
 
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
+	ATA_EH_MAX_TRIES	= 3,
 };
 
 enum hsm_task_states {
@@ -692,6 +693,9 @@ extern unsigned int ata_eh_autopsy(struc
 extern void ata_eh_report(struct ata_port *ap, struct ata_queued_cmd *qc,
 			  const struct ata_taskfile *tf, u32 serror,
 			  unsigned int action, const char *desc);
+extern int ata_eh_revive(struct ata_port *ap, unsigned int action,
+			 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+			 ata_postreset_fn_t postreset);
 
 
 static inline int
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 07/13] libata: implement ata_eh_report()
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (4 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 03/13] libata: add per-dev ata_ering Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 06/13] libata: implement ata_eh_autopsy() Tejun Heo
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH helper function ata_eh_report().  This function reports
to user which command caused what kind of error on which device.
Detailed EH status is also printed to help tracking down the problem.
LLDDs may supply LLDD specific message to ata_eh_report().  It will
format them and print it together with other error information.

Using this function standardizes error messages over different drivers
helping both the users and developers.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 +
 drivers/scsi/libata-eh.c   |   50 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    3 +++
 3 files changed, 54 insertions(+), 0 deletions(-)

2480239bcc13b4fa02bdfc5b70140cf4d46a50ff
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index ea5335b..dbf6815 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5281,3 +5281,4 @@ EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
 EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
 EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
 EXPORT_SYMBOL_GPL(ata_eh_autopsy);
+EXPORT_SYMBOL_GPL(ata_eh_report);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 5224fe4..7b8b2d8 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -830,3 +830,53 @@ unsigned int ata_eh_autopsy(struct ata_p
 	return action;
 }
 
+/**
+ *	ata_eh_report - report error handling to user
+ *	@ap: host port EH is going on
+ *	@qc: failed qc (could be NULL)
+ *	@tf: current taskfile register values
+ *	@serror: SError register value
+ *	@action: determined recovery action
+ *	@desc: extra description
+ *
+ *	Report EH to user.
+ *
+ *	LOCKING:
+ *	None.
+ */
+void ata_eh_report(struct ata_port *ap, struct ata_queued_cmd *qc,
+		   const struct ata_taskfile *tf, u32 serror,
+		   unsigned int action, const char *desc)
+{
+	const char *desc_head, *desc_tail;
+
+	if (desc && desc[0] != '\0') {
+		desc_head = "      (";
+		desc_tail = ")\n";
+	} else {
+		desc_head = "";
+		desc = "";
+		desc_tail = "";
+	}
+
+	if (!qc) {
+		printk(KERN_ERR
+		       "ata%u: stat 0x%x err 0x%x SError 0x%x action 0x%x\n"
+		       "%s%s%s",
+		       ap->id, tf->command, tf->feature, serror,
+		       action, desc_head, desc, desc_tail);
+		return;
+	}
+
+	if (!qc->err_mask)
+		return;
+
+	printk(KERN_ERR
+	       "ata%u: dev %u command 0x%x tag %u failed with %s\n"
+	       "      Emask 0x%x stat 0x%x err 0x%x SErr 0x%x action 0x%x\n"
+	       "%s%s%s",
+	       ap->id, qc->dev->devno, qc->tf.command, qc->tag,
+	       ata_err_string(qc->err_mask), qc->err_mask,
+	       tf->command, tf->feature, serror, action,
+	       desc_head, desc, desc_tail);
+}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f65bde5..437c5ad 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -689,6 +689,9 @@ extern struct ata_queued_cmd * ata_eh_de
 extern unsigned int ata_eh_autopsy(struct ata_port *ap,
 				   struct ata_queued_cmd *qc,
 				   const struct ata_taskfile *tf, u32 serror);
+extern void ata_eh_report(struct ata_port *ap, struct ata_queued_cmd *qc,
+			  const struct ata_taskfile *tf, u32 serror,
+			  unsigned int action, const char *desc);
 
 
 static inline int
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 12/13] sata_sil: convert to new EH
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (9 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 08/13] libata: implement ata_eh_revive() Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 09/13] libata: implement ata_eh_finish_qcs() Tejun Heo
  2006-04-03  3:44 ` [PATCH 10/13] libata: implement EH methods for BMDMA controllers Tejun Heo
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Convert sata_sil to new EH.  As these controllers have hardware
interrupt mask and are known to have screaming interrupts issues, use
hardware IRQ masking for freezing.  sil_freeze() masks interrupts for
the port and sil_postreset() thaws it.  As ports are automatically
frozen before probing reset, there is no need to initialize interrupt
masks sil_init_onde().  Remove related code.

Other than freezing, sata_sil uses stock BMDMA EH routines.a

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/sata_sil.c |   63 +++++++++++++++++++++++++++++++++++------------
 1 files changed, 47 insertions(+), 16 deletions(-)

1ed544ba5e576ec76f13776dd32e8ced0bd0f97a
diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c
index d6c7086..2bb1ce1 100644
--- a/drivers/scsi/sata_sil.c
+++ b/drivers/scsi/sata_sil.c
@@ -95,7 +95,10 @@ static int sil_init_one (struct pci_dev 
 static void sil_dev_config(struct ata_port *ap, struct ata_device *dev);
 static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg);
 static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int sil_probe_reset(struct ata_port *ap, unsigned int *classes);
 static void sil_post_set_mode (struct ata_port *ap);
+static void sil_freeze(struct ata_port *ap);
+static void sil_error_handler(struct ata_port *ap);
 
 
 static const struct pci_device_id sil_pci_tbl[] = {
@@ -167,7 +170,7 @@ static const struct ata_port_operations 
 	.check_status		= ata_check_status,
 	.exec_command		= ata_exec_command,
 	.dev_select		= ata_std_dev_select,
-	.probe_reset		= ata_std_probe_reset,
+	.probe_reset		= sil_probe_reset,
 	.post_set_mode		= sil_post_set_mode,
 	.bmdma_setup            = ata_bmdma_setup,
 	.bmdma_start            = ata_bmdma_start,
@@ -175,7 +178,9 @@ static const struct ata_port_operations 
 	.bmdma_status		= ata_bmdma_status,
 	.qc_prep		= ata_qc_prep,
 	.qc_issue		= ata_qc_issue_prot,
-	.eng_timeout		= ata_eng_timeout,
+	.freeze			= sil_freeze,
+	.error_handler		= sil_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
 	.irq_handler		= ata_interrupt,
 	.irq_clear		= ata_bmdma_irq_clear,
 	.scr_read		= sil_scr_read,
@@ -315,6 +320,45 @@ static void sil_scr_write (struct ata_po
 		writel(val, mmio);
 }
 
+static void sil_postreset(struct ata_port *ap, unsigned int *classes)
+{
+	void __iomem *mmio_base = ap->host_set->mmio_base;
+	u32 tmp;
+
+	/* reset complete, turn on IRQ */
+	tmp = readl(mmio_base + SIL_SYSCFG);
+	tmp &= ~(SIL_MASK_IDE0_INT << ap->port_no);
+	writel(tmp, mmio_base + SIL_SYSCFG);
+	readl(mmio_base + SIL_SYSCFG);	/* flush */
+
+	ata_std_postreset(ap, classes);
+}
+
+static int sil_probe_reset(struct ata_port *ap, unsigned int *classes)
+{
+	return ata_drive_probe_reset(ap, ata_std_probeinit,
+				     ata_std_softreset, sata_std_hardreset,
+				     sil_postreset, classes);
+}
+
+static void sil_freeze(struct ata_port *ap)
+{
+	void __iomem *mmio_base = ap->host_set->mmio_base;
+	u32 tmp;
+
+	/* plug IRQ */
+	tmp = readl(mmio_base + SIL_SYSCFG);
+	tmp |= SIL_MASK_IDE0_INT << ap->port_no;
+	writel(tmp, mmio_base + SIL_SYSCFG);
+	readl(mmio_base + SIL_SYSCFG);	/* flush */
+}
+
+static void sil_error_handler(struct ata_port *ap)
+{
+	ata_bmdma_drive_eh(ap, ata_std_softreset, sata_std_hardreset,
+			   sil_postreset);
+}
+
 /**
  *	sil_dev_config - Apply device/host-specific errata fixups
  *	@ap: Port containing device to be examined
@@ -385,7 +429,7 @@ static int sil_init_one (struct pci_dev 
 	int rc;
 	unsigned int i;
 	int pci_dev_busy = 0;
-	u32 tmp, irq_mask;
+	u32 tmp;
 	u8 cls;
 
 	if (!printed_version++)
@@ -479,24 +523,11 @@ static int sil_init_one (struct pci_dev 
 	}
 
 	if (ent->driver_data == sil_3114) {
-		irq_mask = SIL_MASK_4PORT;
-
 		/* flip the magic "make 4 ports work" bit */
 		tmp = readl(mmio_base + sil_port[2].bmdma);
 		if ((tmp & SIL_INTR_STEERING) == 0)
 			writel(tmp | SIL_INTR_STEERING,
 			       mmio_base + sil_port[2].bmdma);
-
-	} else {
-		irq_mask = SIL_MASK_2PORT;
-	}
-
-	/* make sure IDE0/1/2/3 interrupts are not masked */
-	tmp = readl(mmio_base + SIL_SYSCFG);
-	if (tmp & irq_mask) {
-		tmp &= ~irq_mask;
-		writel(tmp, mmio_base + SIL_SYSCFG);
-		readl(mmio_base + SIL_SYSCFG);	/* flush */
 	}
 
 	/* mask all SATA phy-related interrupts */
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 09/13] libata: implement ata_eh_finish_qcs()
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (10 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 12/13] sata_sil: convert to new EH Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 10/13] libata: implement EH methods for BMDMA controllers Tejun Heo
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH helper function ata_eh_finish_qcs().  This function is
called after all EH actions are complete and finishes all the failed
qcs.  Depending on error status, a qc may be retried or completed.
This function is also responsible for loading qc->tf with resulting TF
values.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 +
 drivers/scsi/libata-eh.c   |   45 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    2 ++
 3 files changed, 48 insertions(+), 0 deletions(-)

82204946494cd4749cd83431dd141560f33969c6
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 62dabb1..f3ab396 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5283,3 +5283,4 @@ EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
 EXPORT_SYMBOL_GPL(ata_eh_autopsy);
 EXPORT_SYMBOL_GPL(ata_eh_report);
 EXPORT_SYMBOL_GPL(ata_eh_revive);
+EXPORT_SYMBOL_GPL(ata_eh_finish_qcs);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 3fc3f0f..80d1282 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -1029,3 +1029,48 @@ int ata_eh_revive(struct ata_port *ap, u
 
 	return rc;
 }
+
+/**
+ *	ata_eh_finish_qcs - complete or retry commands
+ *	@ap: host port to finish qc's for
+ *	@qc: the failed qc (can be NULL)
+ *	@tf: taskfile register of the failed qc
+ *
+ *	Retry or complete failed qc's.
+ *
+ *	LOCKING:
+ *	None.
+ */
+void ata_eh_finish_qcs(struct ata_port *ap, struct ata_queued_cmd *qc,
+		       struct ata_taskfile *tf)
+{
+	struct ata_taskfile tmp_tf;
+
+	if (qc) {
+		/* prevent infinite retry loop */
+		if (!qc->err_mask && !(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
+			printk(KERN_WARNING "ata%u: dev %u qc has no error "
+			       "flag set after EH, forcing AC_ERR_OTHER\n",
+			       ap->id, qc->dev->devno);
+			qc->err_mask |= AC_ERR_OTHER;
+		}
+
+		/* FIXME: qc->tf will be used by completion callbacks
+		 * to generate SCSI sense data.  This is to share
+		 * sense generation code with old-EH drivers.  Once EH
+		 * migration is complete, generate sense data in this
+		 * function, considering both err_mask and tf.
+		 */
+		tmp_tf = *tf;
+		tmp_tf.flags = qc->tf.flags;
+		tmp_tf.protocol = qc->tf.protocol;
+		tmp_tf.ctl = qc->tf.ctl;
+		qc->tf = tmp_tf;
+
+		if (qc->err_mask & AC_ERR_INVALID ||
+		    qc->flags & ATA_QCFLAG_SENSE_VALID)
+			ata_eh_qc_complete(qc);
+		else
+			ata_eh_qc_retry(qc);
+	}
+}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index ed28a1d..f3f53ad 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -696,6 +696,8 @@ extern void ata_eh_report(struct ata_por
 extern int ata_eh_revive(struct ata_port *ap, unsigned int action,
 			 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
 			 ata_postreset_fn_t postreset);
+extern void ata_eh_finish_qcs(struct ata_port *ap, struct ata_queued_cmd *qc,
+			      struct ata_taskfile *tf);
 
 
 static inline int
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 10/13] libata: implement EH methods for BMDMA controllers
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (11 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 09/13] libata: implement ata_eh_finish_qcs() Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH methods for BMDMA controllers.  The followings are
defined.

* ata_bmdma_freeze: freeze BMDMA controller by turning on ATA_NIEN
* ata_bmdma_bmdma_drive_eh: drive BMDMA EH using given soft, hard and
			    post reset methods.
* ata_bmdma_error_handler: the stock BMDMA EH with stock reset
			   routines.
* ata_bmdma_post_internal_cmd: the stock BMDMA post_internal_cmd.
			       Makes sure BMDMA engine is stopped
			       after an internal command.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-bmdma.c |  112 +++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libata-core.c  |    4 ++
 include/linux/libata.h      |    7 +++
 3 files changed, 123 insertions(+), 0 deletions(-)

2537bfbd1e40eeb9302062aa6513f3a300436003
diff --git a/drivers/scsi/libata-bmdma.c b/drivers/scsi/libata-bmdma.c
index 835dff0..6e48ce5 100644
--- a/drivers/scsi/libata-bmdma.c
+++ b/drivers/scsi/libata-bmdma.c
@@ -652,6 +652,118 @@ void ata_bmdma_stop(struct ata_queued_cm
 	ata_altstatus(ap);        /* dummy read */
 }
 
+/**
+ *	ata_bmdma_freeze - Freeze BMDMA controller port
+ *	@ap: port to freeze
+ *
+ *	Freeze BMDMA controller port.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+void ata_bmdma_freeze(struct ata_port *ap)
+{
+	struct ata_ioports *ioaddr = &ap->ioaddr;
+
+	ap->ctl |= ATA_NIEN;
+	ap->last_ctl = ap->ctl;
+
+	if (ap->flags & ATA_FLAG_MMIO)
+		writeb(ap->ctl, (void __iomem *)ioaddr->ctl_addr);
+	else
+		outb(ap->ctl, ioaddr->ctl_addr);
+}
+
+/**
+ *	ata_bmdma_drive_eh - Perform EH with given methods for BMDMA controller
+ *	@ap: port to handle error for
+ *
+ *	Handle error for ATA BMDMA controller.  It can handle both
+ *	PATA and SATA controllers.  Many controllers should be able to
+ *	use this EH as-is or with some added handling before and
+ *	after.
+ *
+ *	This function is intended to be used for constructing
+ *	->error_handler callback by low level drivers.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+void ata_bmdma_drive_eh(struct ata_port *ap,
+			ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+			ata_postreset_fn_t postreset)
+{
+	struct ata_host_set *host_set = ap->host_set;
+	unsigned int action = 0;
+	struct ata_queued_cmd *qc;
+	unsigned long flags;
+	struct ata_taskfile tf;
+	u32 serror;
+
+	qc = ata_eh_determine_qc(ap, &tf);
+
+	/* reset PIO HSM and stop DMA engine */
+	spin_lock_irqsave(&host_set->lock, flags);
+
+	ap->flags &= ~ATA_FLAG_NOINTR;
+	ap->hsm_task_state = HSM_ST_IDLE;
+
+	if (qc && (qc->tf.protocol == ATA_PROT_DMA ||
+		   qc->tf.protocol == ATA_PROT_ATAPI_DMA))
+		ap->ops->bmdma_stop(qc);
+
+	ata_altstatus(ap);
+	ata_chk_status(ap);
+	ap->ops->irq_clear(ap);
+
+	spin_unlock_irqrestore(&host_set->lock, flags);
+
+	/* PIO and DMA engines have been stopped, perform recovery */
+	serror = 0;
+	if (ap->cbl == ATA_CBL_SATA && ap->ops->scr_read) {
+		serror = scr_read(ap, SCR_ERROR);
+		scr_write(ap, SCR_ERROR, serror);
+	}
+
+	action |= ata_eh_autopsy(ap, qc, &tf, serror);
+	ata_eh_report(ap, qc, &tf, serror, action, NULL);
+	ata_eh_revive(ap, action, softreset, hardreset, postreset);
+	ata_eh_finish_qcs(ap, qc, &tf);
+}
+
+/**
+ *	ata_bmdma_error_handler - Stock error handler for BMDMA controller
+ *	@ap: port to handle error for
+ *
+ *	Stock error handler for BMDMA controller.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+void ata_bmdma_error_handler(struct ata_port *ap)
+{
+	ata_reset_fn_t hardreset;
+
+	hardreset = NULL;
+	if (ap->flags & ATA_FLAG_SATA && ap->ops->scr_read)
+		hardreset = sata_std_hardreset;
+
+	ata_bmdma_drive_eh(ap, ata_std_softreset, hardreset, ata_std_postreset);
+}
+
+/**
+ *	ata_bmdma_post_internal_cmd - Stock post_internal_cmd for
+ *				      BMDMA controller
+ *	@qc: internal command to clean up
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc)
+{
+	ata_bmdma_stop(qc);
+}
+
 #ifdef CONFIG_PCI
 static struct ata_probe_ent *
 ata_probe_ent_alloc(struct device *dev, const struct ata_port_info *port)
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index f3ab396..5491afb 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5227,6 +5227,10 @@ EXPORT_SYMBOL_GPL(ata_bmdma_start);
 EXPORT_SYMBOL_GPL(ata_bmdma_irq_clear);
 EXPORT_SYMBOL_GPL(ata_bmdma_status);
 EXPORT_SYMBOL_GPL(ata_bmdma_stop);
+EXPORT_SYMBOL_GPL(ata_bmdma_freeze);
+EXPORT_SYMBOL_GPL(ata_bmdma_drive_eh);
+EXPORT_SYMBOL_GPL(ata_bmdma_error_handler);
+EXPORT_SYMBOL_GPL(ata_bmdma_post_internal_cmd);
 EXPORT_SYMBOL_GPL(ata_port_probe);
 EXPORT_SYMBOL_GPL(sata_phy_reset);
 EXPORT_SYMBOL_GPL(__sata_phy_reset);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f3f53ad..43e5392 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -622,6 +622,13 @@ extern void ata_bmdma_start (struct ata_
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
 extern u8   ata_bmdma_status(struct ata_port *ap);
 extern void ata_bmdma_irq_clear(struct ata_port *ap);
+extern void ata_bmdma_freeze(struct ata_port *ap);
+extern void ata_bmdma_drive_eh(struct ata_port *ap,
+			       ata_reset_fn_t softreset,
+			       ata_reset_fn_t hardreset,
+			       ata_postreset_fn_t postreset);
+extern void ata_bmdma_error_handler(struct ata_port *ap);
+extern void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev,
 			      struct scsi_cmnd *cmd,
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 06/13] libata: implement ata_eh_autopsy()
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (5 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 07/13] libata: implement ata_eh_report() Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 13/13] ahci: convert to new EH Tejun Heo
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Implement EH helper function ata_eh_autopsy().  This function analyzes
how the port and qc failed and determine what to do to recover from
the condition.

* Analyzes TF/SError
* Record the error and determine whether speeding down is necessary.
  If so, adjust relevant limits.
* Determine which action is required to recover - REVALIDATE,
  PORT_SOFTRESET or PORT_HARDRESET.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 
 drivers/scsi/libata-eh.c   |  280 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    3 
 3 files changed, 284 insertions(+), 0 deletions(-)

166600385563ff9043f86179422b041fb6a1c7fb
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 06bf7f4..ea5335b 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5280,3 +5280,4 @@ EXPORT_SYMBOL_GPL(ata_eh_schedule_port);
 EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
 EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
 EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
+EXPORT_SYMBOL_GPL(ata_eh_autopsy);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 7781f67..5224fe4 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -550,3 +550,283 @@ struct ata_queued_cmd * ata_eh_determine
 	return __ata_qc_from_tag(ap, ap->active_tag);
 }
 
+/**
+ *	ata_eh_analyze_tf - analyze taskfile of a failed qc
+ *	@qc: qc to analyze
+ *	@tf: Taskfile registers to analyze
+ *
+ *	Analyze taskfile of @qc and further determine cause of
+ *	failure.  This function also requests ATAPI sense data if
+ *	avaliable.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	Determined recovery action
+ */
+static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
+				      const struct ata_taskfile *tf)
+{
+	unsigned int tmp, action = 0;
+	u8 stat = tf->command, err = tf->feature;
+
+	if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
+		qc->err_mask |= AC_ERR_HSM;
+		return ATA_PORT_SOFTRESET;
+	}
+
+	if (!(qc->err_mask & AC_ERR_DEV))
+		return 0;
+
+	switch (qc->dev->class) {
+	case ATA_DEV_ATA:
+		if (err & ATA_ICRC)
+			qc->err_mask |= AC_ERR_ATA_BUS;
+		if (err & ATA_UNC)
+			qc->err_mask |= AC_ERR_MEDIA;
+		if (err & ATA_IDNF)
+			qc->err_mask |= AC_ERR_INVALID;
+		break;
+
+	case ATA_DEV_ATAPI:
+		tmp = atapi_eh_request_sense(qc->ap, qc->dev,
+					     qc->scsicmd->sense_buffer);
+		if (!tmp) {
+			/*
+			 * ATA_QCFLAG_SENSE_VALID is used to tell
+			 * atapi_qc_complete() that sense data is
+			 * already valid.
+			 *
+			 * TODO: interpret sense data and set
+			 * appropriate err_mask.
+			 */
+			qc->err_mask &= ~AC_ERR_DEV;
+			qc->flags |= ATA_QCFLAG_SENSE_VALID;
+		} else
+			qc->err_mask |= tmp;
+	}
+
+	if (qc->err_mask) {
+		action |= ATA_PORT_REVALIDATE;
+		if (qc->err_mask &
+		    (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
+			action |= ATA_PORT_SOFTRESET;
+	}
+
+	return action;
+}
+
+/**
+ *	ata_eh_analyze_serror - analyze SError of a failed qc
+ *	@serror: SError to analyze
+ *	@p_err_mask: Resulting err_mask
+ *
+ *	Analyze SError if available and further determine cause of
+ *	failure.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	Determined recovery action
+ */
+static unsigned int ata_eh_analyze_serror(u32 serror, unsigned int *p_err_mask)
+{
+	unsigned int action = 0;
+
+	/* read SError and clear it */
+	if (serror & SERR_PERSISTENT) {
+		*p_err_mask |= AC_ERR_ATA_BUS;
+		action |= ATA_PORT_HARDRESET;
+	}
+	if (serror &
+	    (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
+		*p_err_mask |= AC_ERR_ATA_BUS;
+		action |= ATA_PORT_SOFTRESET;
+	}
+	if (serror & SERR_PROTOCOL) {
+		*p_err_mask |= AC_ERR_HSM;
+		action |= ATA_PORT_SOFTRESET;
+	}
+	if (serror & SERR_INTERNAL) {
+		*p_err_mask |= AC_ERR_SYSTEM;
+		action |= ATA_PORT_SOFTRESET;
+	}
+
+	return action;
+}
+
+static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
+{
+	if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
+		return 1;
+
+	if (ent->is_io) {
+		if (ent->err_mask & AC_ERR_HSM)
+			return 1;
+		if ((ent->err_mask &
+		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+			return 2;
+	}
+
+	return 0;
+}
+
+struct speed_down_needed_arg {
+	u64 since;
+	int nr_errors[3];
+};
+
+static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
+{
+	struct speed_down_needed_arg *arg = void_arg;
+
+	if (ent->timestamp < arg->since)
+		return -1;
+
+	arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
+	return 0;
+}
+
+/**
+ *	ata_eh_speed_down_needed - Determine wheter speed down is necessary
+ *	@dev: Device of interest
+ *
+ *	This function examines error ring of @dev and determines
+ *	whether speed down is necessary.  Speed down is necessary if
+ *	there have been more than 3 of CAT-1 errors or 10 of Cat-2
+ *	errors during last 15 minutes.
+ *
+ *	Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
+ *	violation for known supported commands.
+ *
+ *	Cat-2 errors are unclassified DEV error for known supported
+ *	command.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ *	RETURNS:
+ *	1 if speed down is necessary, 0 otherwise
+ */
+static int ata_eh_speed_down_needed(struct ata_device *dev)
+{
+	const u64 interval = 15LLU * 60 * HZ;
+	static const int err_limits[3] = { -1, 3, 10 };
+	struct speed_down_needed_arg arg;
+	struct ata_ering_entry *ent;
+	int err_cat;
+	u64 j64;
+
+	ent = ata_ering_top(&dev->ering);
+	if (!ent)
+		return 0;
+
+	err_cat = ata_eh_categorize_ering_entry(ent);
+	if (err_cat == 0)
+		return 0;
+
+	memset(&arg, 0, sizeof(arg));
+
+	j64 = get_jiffies_64();
+	if (j64 >= interval)
+		arg.since = j64 - interval;
+	else
+		arg.since = 0;
+
+	ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
+
+	return arg.nr_errors[err_cat] > err_limits[err_cat];
+}
+
+/**
+ *	ata_eh_speed_down - record error and speed down if necessary
+ *	@ap: Host port failed device lives on
+ *	@dev: Failed device
+ *	@is_io: Did the device fail during normal IO?
+ *	@err_mask: err_mask of the error
+ *
+ *	Record error and examine error history to determine whether
+ *	adjusting transmission speed is necessary.  It also sets
+ *	transmission limits appropriately if such adjustment is
+ *	necessary.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise
+ */
+static int ata_eh_speed_down(struct ata_port *ap, struct ata_device *dev,
+			     int is_io, unsigned int err_mask)
+{
+	if (!err_mask)
+		return 0;
+
+	/* record error and determine whether speed down is necessary */
+	ata_ering_record(&dev->ering, is_io, err_mask);
+
+	if (!ata_eh_speed_down_needed(dev))
+		return 0;
+
+	/* speed down SATA link speed if possible */
+	if (ata_down_sata_spd_limit(ap) == 0)
+		return ATA_PORT_HARDRESET;
+
+	/* lower transfer mode */
+	if (ata_down_xfermask_limit(ap, dev, 0) == 0)
+		return ATA_PORT_SOFTRESET;
+
+	printk(KERN_ERR "ata%u: dev %u speed down requested but no "
+	       "transfer mode left\n", ap->id, dev->devno);
+	return 0;
+}
+
+/**
+ *	ata_eh_autopsy - analyze error and determine recovery action
+ *	@ap: host port to perform autopsy on
+ *	@qc: failed command
+ *	@tf: taskfile registers to analyze
+ *	@serror: SError value to analyze
+ *
+ *	Analyze why @qc failed and determine which recovery action is
+ *	needed.  This function also sets more detailed AC_ERR_* values
+ *	and fills sense data for ATAPI CHECK SENSE.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	Determined recovery action
+ */
+unsigned int ata_eh_autopsy(struct ata_port *ap, struct ata_queued_cmd *qc,
+			    const struct ata_taskfile *tf, u32 serror)
+{
+	unsigned int action = 0;
+
+	if (ap->flags & ATA_FLAG_FROZEN)
+		action |= ATA_PORT_SOFTRESET;
+
+	if (!qc)
+		return action;
+
+	if (qc->err_mask & AC_ERR_TIMEOUT)
+		action |= ATA_PORT_SOFTRESET;
+
+	/* determine cause of failure. */
+	action |= ata_eh_analyze_tf(qc, tf);
+	action |= ata_eh_analyze_serror(serror, &qc->err_mask);
+	action |= ata_eh_speed_down(ap, qc->dev, qc->flags & ATA_QCFLAG_IO,
+				    qc->err_mask);
+
+	/* DEV errors are probably spurious in case of ATA_BUS error */
+	if (qc->err_mask & AC_ERR_ATA_BUS)
+		qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | AC_ERR_INVALID);
+
+	if (qc->err_mask)
+		action |= ATA_PORT_REVALIDATE;
+
+	return action;
+}
+
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 789dd75..f65bde5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -686,6 +686,9 @@ extern void ata_eh_qc_complete(struct at
 extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
 extern struct ata_queued_cmd * ata_eh_determine_qc(struct ata_port *ap,
 						   struct ata_taskfile *tf);
+extern unsigned int ata_eh_autopsy(struct ata_port *ap,
+				   struct ata_queued_cmd *qc,
+				   const struct ata_taskfile *tf, u32 serror);
 
 
 static inline int
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 13/13] ahci: convert to new EH
  2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
                   ` (6 preceding siblings ...)
  2006-04-03  3:44 ` [PATCH 06/13] libata: implement ata_eh_autopsy() Tejun Heo
@ 2006-04-03  3:44 ` Tejun Heo
  2006-04-03  3:44 ` [PATCH 11/13] ata_piix: " Tejun Heo
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  3:44 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide; +Cc: Tejun Heo

Convert AHCI to new EH.  Unfortunately, ICH7 AHCI reacts badly if IRQ
mask is diddled during operation.  So, freezing is implemented by
unconditionally clearing interrupt conditions while frozen.

* AHCI interrupt handler does not analyze any of error conditions.  It
  just records relevant status registers in driver private area and
  invoke EH.  EH is responsible for decoding all those information.

* Interrupts are categorized according to required action.
  e.g. Connection status or unknown FIS error requires freezing the
  port while TF or HBUS_DATA don't.

* Only CONNECT (reflects SErr.X) interrupt is taken into account not
  PHYRDY (SErr.N), as CONNECT is better cue for starting EH.

* AHCI may be invoked without any active command.  e.g. CONNECT irq
  occuring while no qc in progress still triggers EH and will reset
  the port and revalidate attached device.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/ahci.c |  235 ++++++++++++++++++++++++++++++++-------------------
 1 files changed, 147 insertions(+), 88 deletions(-)

d3b0304d82c87014e4f6d589d1671215e5ac7347
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index 8403cf3..21aea0b 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -71,6 +71,7 @@ enum {
 	AHCI_CMD_CLR_BUSY	= (1 << 10),
 
 	RX_FIS_D2H_REG		= 0x40,	/* offset of D2H Register FIS data */
+	RX_FIS_UNK		= 0x60, /* offset of Unknown FIS data */
 
 	board_ahci		= 0,
 
@@ -127,15 +128,16 @@ enum {
 	PORT_IRQ_PIOS_FIS	= (1 << 1), /* PIO Setup FIS rx'd */
 	PORT_IRQ_D2H_REG_FIS	= (1 << 0), /* D2H Register FIS rx'd */
 
-	PORT_IRQ_FATAL		= PORT_IRQ_TF_ERR |
-				  PORT_IRQ_HBUS_ERR |
-				  PORT_IRQ_HBUS_DATA_ERR |
-				  PORT_IRQ_IF_ERR,
-	DEF_PORT_IRQ		= PORT_IRQ_FATAL | PORT_IRQ_PHYRDY |
-				  PORT_IRQ_CONNECT | PORT_IRQ_SG_DONE |
-				  PORT_IRQ_UNK_FIS | PORT_IRQ_SDB_FIS |
-				  PORT_IRQ_DMAS_FIS | PORT_IRQ_PIOS_FIS |
-				  PORT_IRQ_D2H_REG_FIS,
+	PORT_IRQ_FREEZE		= PORT_IRQ_HBUS_ERR |
+				  PORT_IRQ_IF_ERR |
+				  PORT_IRQ_CONNECT |
+				  PORT_IRQ_UNK_FIS,
+	PORT_IRQ_ERROR		= PORT_IRQ_FREEZE |
+				  PORT_IRQ_TF_ERR |
+				  PORT_IRQ_HBUS_DATA_ERR,
+	DEF_PORT_IRQ		= PORT_IRQ_ERROR | PORT_IRQ_SG_DONE |
+				  PORT_IRQ_SDB_FIS | PORT_IRQ_DMAS_FIS |
+				  PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS,
 
 	/* PORT_CMD bits */
 	PORT_CMD_ATAPI		= (1 << 24), /* Device is ATAPI */
@@ -184,6 +186,9 @@ struct ahci_port_priv {
 	struct ahci_sg		*cmd_tbl_sg;
 	void			*rx_fis;
 	dma_addr_t		rx_fis_dma;
+	/* register values stored by interrupt handler for EH */
+	u32			eh_irq_stat;
+	u32			eh_serror;
 };
 
 static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -193,13 +198,13 @@ static unsigned int ahci_qc_issue(struct
 static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
 static int ahci_probe_reset(struct ata_port *ap, unsigned int *classes);
 static void ahci_irq_clear(struct ata_port *ap);
-static void ahci_eng_timeout(struct ata_port *ap);
+static void ahci_error_handler(struct ata_port *ap);
+static void ahci_post_internal_cmd(struct ata_queued_cmd *qc);
 static int ahci_port_start(struct ata_port *ap);
 static void ahci_port_stop(struct ata_port *ap);
 static void ahci_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
 static void ahci_qc_prep(struct ata_queued_cmd *qc);
 static u8 ahci_check_status(struct ata_port *ap);
-static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
 static void ahci_remove_one (struct pci_dev *pdev);
 
 static struct scsi_host_template ahci_sht = {
@@ -234,7 +239,8 @@ static const struct ata_port_operations 
 	.qc_prep		= ahci_qc_prep,
 	.qc_issue		= ahci_qc_issue,
 
-	.eng_timeout		= ahci_eng_timeout,
+	.error_handler		= ahci_error_handler,
+	.post_internal_cmd	= ahci_post_internal_cmd,
 
 	.irq_handler		= ahci_interrupt,
 	.irq_clear		= ahci_irq_clear,
@@ -779,108 +785,163 @@ static void ahci_qc_prep(struct ata_queu
 	ahci_fill_cmd_slot(pp, opts);
 }
 
-static void ahci_restart_port(struct ata_port *ap, u32 irq_stat)
+static unsigned int ahci_eh_autopsy(struct ata_port *ap,
+				    struct ata_queued_cmd *qc, u32 irq_stat,
+				    char *desc, size_t desc_sz)
 {
-	void __iomem *mmio = ap->host_set->mmio_base;
-	void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
-	u32 tmp;
+	struct ahci_port_priv *pp = ap->private_data;
+	unsigned int err_mask = 0, action = 0;
+	int rc;
 
-	if ((ap->device[0].class != ATA_DEV_ATAPI) ||
-	    ((irq_stat & PORT_IRQ_TF_ERR) == 0))
-		printk(KERN_WARNING "ata%u: port reset, "
-		       "p_is %x is %x pis %x cmd %x tf %x ss %x se %x\n",
-			ap->id,
-			irq_stat,
-			readl(mmio + HOST_IRQ_STAT),
-			readl(port_mmio + PORT_IRQ_STAT),
-			readl(port_mmio + PORT_CMD),
-			readl(port_mmio + PORT_TFDATA),
-			readl(port_mmio + PORT_SCR_STAT),
-			readl(port_mmio + PORT_SCR_ERR));
+	rc = scnprintf(desc, desc_sz, "irq_stat 0x%08x", irq_stat);
+	desc += rc;
+	desc_sz -= rc;
 
-	/* stop DMA */
-	ahci_stop_engine(ap);
+	if (irq_stat & PORT_IRQ_TF_ERR)
+		err_mask |= AC_ERR_DEV;
 
-	/* clear SATA phy error, if any */
-	tmp = readl(port_mmio + PORT_SCR_ERR);
-	writel(tmp, port_mmio + PORT_SCR_ERR);
+	if (irq_stat & (PORT_IRQ_HBUS_ERR | PORT_IRQ_HBUS_DATA_ERR)) {
+		err_mask |= AC_ERR_HOST_BUS;
+		action |= ATA_PORT_SOFTRESET;
+	}
 
-	/* if DRQ/BSY is set, device needs to be reset.
-	 * if so, issue COMRESET
-	 */
-	tmp = readl(port_mmio + PORT_TFDATA);
-	if (tmp & (ATA_BUSY | ATA_DRQ)) {
-		writel(0x301, port_mmio + PORT_SCR_CTL);
-		readl(port_mmio + PORT_SCR_CTL); /* flush */
-		udelay(10);
-		writel(0x300, port_mmio + PORT_SCR_CTL);
-		readl(port_mmio + PORT_SCR_CTL); /* flush */
+	if (irq_stat & PORT_IRQ_IF_ERR) {
+		err_mask |= AC_ERR_ATA_BUS;
+		action |= ATA_PORT_SOFTRESET;
+		rc = scnprintf(desc, desc_sz, ", Interface fatal error");
+		desc += rc;
+		desc_sz -= rc;
 	}
 
-	/* re-start DMA */
-	ahci_start_engine(ap);
+	if (irq_stat & PORT_IRQ_CONNECT) {
+		err_mask |= AC_ERR_ATA_BUS;
+		action |= ATA_PORT_SOFTRESET;
+		rc = scnprintf(desc, desc_sz, ", Connection status changed");
+		desc += rc;
+		desc_sz -= rc;
+	}
+
+	if (irq_stat & PORT_IRQ_UNK_FIS) {
+		unsigned int *unk = (unsigned int *)(pp->rx_fis + RX_FIS_UNK);
+
+		err_mask |= AC_ERR_HSM;
+		action |= ATA_PORT_SOFTRESET;
+		rc = scnprintf(desc, desc_sz,
+			       ", Unknown FIS %08x %08x %08x %08x",
+			       unk[0], unk[1], unk[2], unk[3]);
+		desc += rc;
+		desc_sz -= rc;
+	}
+
+	if (qc)
+		qc->err_mask |= err_mask;
+
+	return action;
 }
 
-static void ahci_eng_timeout(struct ata_port *ap)
+static void ahci_error_handler(struct ata_port *ap)
 {
-	struct ata_host_set *host_set = ap->host_set;
-	void __iomem *mmio = host_set->mmio_base;
-	void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
+	struct ahci_port_priv *pp = ap->private_data;
+	unsigned int action = 0;
+	u32 irq_stat, serror;
+	struct ata_taskfile tf;
 	struct ata_queued_cmd *qc;
-	unsigned long flags;
+	char desc[70] = "";
+
+	/* fetch & clear error information from interrupt handler */
+	irq_stat = pp->eh_irq_stat;
+	serror = pp->eh_serror;
 
-	printk(KERN_WARNING "ata%u: handling error/timeout\n", ap->id);
+	pp->eh_irq_stat = 0;
+	pp->eh_serror = 0;
 
-	spin_lock_irqsave(&host_set->lock, flags);
+	if (!(ap->flags & ATA_FLAG_FROZEN)) {
+		/* restart engine */
+		ahci_stop_engine(ap);
+		ahci_start_engine(ap);
+	}
+
+	/* perform recovery */
+	qc = ata_eh_determine_qc(ap, &tf);
+
+	action |= ahci_eh_autopsy(ap, qc, irq_stat, desc, sizeof(desc));
+	action |= ata_eh_autopsy(ap, qc, &tf, serror);
+	ata_eh_report(ap, qc, &tf, serror, action, desc);
+	ata_eh_revive(ap, action,
+		      ahci_softreset, ahci_hardreset, ahci_postreset);
+	ata_eh_finish_qcs(ap, qc, &tf);
+}
+
+static void ahci_post_internal_cmd(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct ahci_port_priv *pp = ap->private_data;
 
-	ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
-	qc = ata_qc_from_tag(ap, ap->active_tag);
-	qc->err_mask |= AC_ERR_TIMEOUT;
+	if (qc->flags & ATA_QCFLAG_FAILED)
+		qc->err_mask |= AC_ERR_OTHER;
 
-	spin_unlock_irqrestore(&host_set->lock, flags);
+	if (qc->err_mask) {
+		/* make DMA engine forget about the failed command */
+		ahci_stop_engine(ap);
+		ahci_start_engine(ap);
+	}
 
-	ata_eh_qc_complete(qc);
+	pp->eh_irq_stat = 0;
+	pp->eh_serror = 0;
 }
 
-static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
+static inline int ahci_host_intr(struct ata_port *ap)
 {
+	struct ahci_port_priv *pp = ap->private_data;
 	void __iomem *mmio = ap->host_set->mmio_base;
 	void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
-	u32 status, serr, ci;
-
-	serr = readl(port_mmio + PORT_SCR_ERR);
-	writel(serr, port_mmio + PORT_SCR_ERR);
+	u32 status, serror, ci;
+	unsigned int eh_flags;
 
 	status = readl(port_mmio + PORT_IRQ_STAT);
 	writel(status, port_mmio + PORT_IRQ_STAT);
 
-	ci = readl(port_mmio + PORT_CMD_ISSUE);
-	if (likely((ci & 0x1) == 0)) {
-		if (qc) {
-			WARN_ON(qc->err_mask);
-			ata_qc_complete(qc);
-			qc = NULL;
-		}
+	/* AHCI gets unhappy if IRQ mask is diddled with while the
+	 * port is active, so we cannot disable IRQ when freezing.
+	 * Clear IRQ conditions and hope screaming IRQs don't happen.
+	 */
+	if (ap->flags & ATA_FLAG_FROZEN) {
+		scr_write(ap, SCR_ERROR, scr_read(ap, SCR_ERROR));
+		return 1;
 	}
 
-	if (status & PORT_IRQ_FATAL) {
-		unsigned int err_mask;
-		if (status & PORT_IRQ_TF_ERR)
-			err_mask = AC_ERR_DEV;
-		else if (status & PORT_IRQ_IF_ERR)
-			err_mask = AC_ERR_ATA_BUS;
-		else
-			err_mask = AC_ERR_HOST_BUS;
-
-		/* command processing has stopped due to error; restart */
-		ahci_restart_port(ap, status);
-
-		if (qc) {
-			qc->err_mask |= err_mask;
-			ata_qc_complete(qc);
+	if (!(status & PORT_IRQ_ERROR)) {
+		struct ata_queued_cmd *qc;
+
+		if ((qc = ata_qc_from_tag(ap, ap->active_tag))) {
+			ci = readl(port_mmio + PORT_CMD_ISSUE);
+			if ((ci & 0x1) == 0) {
+				ata_qc_complete(qc);
+				return 1;
+			}
 		}
+
+		if (ata_ratelimit())
+			printk(KERN_INFO "ata%u: spurious interrupt "
+			       "(irq_stat 0x%x active_tag %d)\n",
+			       ap->id, status, ap->active_tag);
+
+		return 1;
 	}
 
+	/* Something weird is going on.  Hand over to EH. */
+	serror = scr_read(ap, SCR_ERROR);
+	scr_write(ap, SCR_ERROR, serror);
+
+	pp->eh_irq_stat = status;
+	pp->eh_serror = serror;
+
+	eh_flags = ATA_EH_ABORT;
+	if (status & PORT_IRQ_FREEZE)
+		eh_flags |= ATA_EH_FREEZE;
+
+	ata_eh_schedule_port(ap, eh_flags);
+
 	return 1;
 }
 
@@ -918,9 +979,7 @@ static irqreturn_t ahci_interrupt (int i
 
 		ap = host_set->ports[i];
 		if (ap) {
-			struct ata_queued_cmd *qc;
-			qc = ata_qc_from_tag(ap, ap->active_tag);
-			if (!ahci_host_intr(ap, qc))
+			if (!ahci_host_intr(ap))
 				if (ata_ratelimit())
 					dev_printk(KERN_WARNING, host_set->dev,
 					  "unhandled interrupt on port %u\n",
@@ -942,7 +1001,7 @@ static irqreturn_t ahci_interrupt (int i
 		handled = 1;
 	}
 
-        spin_unlock(&host_set->lock);
+	spin_unlock(&host_set->lock);
 
 	VPRINTK("EXIT\n");
 
-- 
1.2.4



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 08/13] libata: implement ata_eh_revive()
  2006-04-03  3:44 ` [PATCH 08/13] libata: implement ata_eh_revive() Tejun Heo
@ 2006-04-03  7:42   ` Tejun Heo
  0 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2006-04-03  7:42 UTC (permalink / raw)
  To: jgarzik, alan, albertcc, linux-ide

Implement EH helper function ata_eh_revive().  This function executes
what ata_eh_autopsy() and other parts of EH determined necessary to
resurrect the port.  As in ata_bus_probe(), each device is given fixed
number (ATA_EH_MAX_TRIES) of chances.  If a device uses up all its
chances and still fail to recover, it gets disabled.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

In the original patch, ap->ops->set_mode() wasn't handled and also was
missing a new line in the comment.  This patch fixes both.

Thanks.

 drivers/scsi/libata-core.c |    1
 drivers/scsi/libata-eh.c   |  163 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    4 +
 3 files changed, 168 insertions(+)

Index: work/drivers/scsi/libata-core.c
===================================================================
--- work.orig/drivers/scsi/libata-core.c	2006-04-03 16:37:20.000000000 +0900
+++ work/drivers/scsi/libata-core.c	2006-04-03 16:37:32.000000000 +0900
@@ -5282,3 +5282,4 @@ EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
 EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
 EXPORT_SYMBOL_GPL(ata_eh_autopsy);
 EXPORT_SYMBOL_GPL(ata_eh_report);
+EXPORT_SYMBOL_GPL(ata_eh_revive);
Index: work/drivers/scsi/libata-eh.c
===================================================================
--- work.orig/drivers/scsi/libata-eh.c	2006-04-03 16:37:20.000000000 +0900
+++ work/drivers/scsi/libata-eh.c	2006-04-03 16:38:14.000000000 +0900
@@ -880,3 +880,166 @@ void ata_eh_report(struct ata_port *ap, 
 	       tf->command, tf->feature, serror, action,
 	       desc_head, desc, desc_tail);
 }
+
+/**
+ *	ata_eh_revive - revive host port after error
+ *	@ap: host port to revive
+ *	@action: action to perform to revive @ap
+ *	@softreset: softreset method (can be NULL)
+ *	@hardreset: hardreset method (can be NULL)
+ *	@postreset: postreset method (can be NULL)
+ *
+ *	Perform action specified by @action to revive host port @ap
+ *	after error.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+int ata_eh_revive(struct ata_port *ap, unsigned int action,
+		  ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+		  ata_postreset_fn_t postreset)
+{
+	unsigned int classes[ATA_MAX_DEVICES];
+	int tries[ATA_MAX_DEVICES], reset_tries;
+	int scr_valid = ap->flags & ATA_FLAG_SATA && ap->ops->scr_read;
+	struct ata_device *dev;
+	ata_reset_fn_t reset;
+	int i, down_xfermask, rc = 0;
+
+	if (!action)
+		goto out;
+
+	reset_tries = ATA_EH_MAX_TRIES;
+	for (i = 0; i < ATA_MAX_DEVICES; i++)
+		tries[i] = ATA_EH_MAX_TRIES;
+
+	/* revalidate */
+	if (action == ATA_PORT_REVALIDATE) {
+		for (i = 0; i < ATA_MAX_DEVICES; i++) {
+			struct ata_device *dev = &ap->device[i];
+			if (!ata_dev_enabled(dev) ||
+			    !(dev->flags & ATA_DFLAG_FAILED))
+				continue;
+			if (ata_dev_revalidate(ap, dev, 0))
+				break;
+		}
+		if (i == ATA_MAX_DEVICES) {
+			rc = 0;
+			goto out;
+		}
+
+		action |= ATA_PORT_SOFTRESET;
+	}
+	action &= ~ATA_PORT_REVALIDATE;
+
+	/* Give devices time to get ready before trying the first
+	 * reset.  Without this, devices tend to fail the first reset
+	 * under certain circumstances and cause much longer delay.
+	 */
+	if (scr_valid && sata_dev_present(ap)) {
+		unsigned long timeout = jiffies + 5 * HZ;
+		while (time_before(jiffies, timeout) &&
+		       ata_chk_status(ap) & ATA_BUSY)
+			ssleep(1);
+	}
+
+	if (softreset && (!hardreset || (!ata_set_sata_spd_needed(ap) &&
+					 action == ATA_PORT_SOFTRESET)))
+		reset = softreset;
+	else
+		reset = hardreset;
+
+ retry:
+	down_xfermask = 0;
+
+	/* reset.  postreset is responsible for thawing the port. */
+	printk("ata%u: %s resetting channel for error handling\n",
+	       ap->id, reset == softreset ? "soft" : "hard");
+
+	rc = ata_do_reset(ap, reset, postreset, 1, classes);
+	if (rc)
+		goto fail_reset;
+
+	/* revalidate and reconfigure devices */
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+
+		if (!tries[i])
+			ata_dev_disable(ap, dev);
+
+		if (!ata_dev_enabled(dev))
+			continue;
+
+		rc = ata_dev_revalidate(ap, dev, 1);
+		if (rc)
+			goto fail;
+	}
+
+	/* configure transfer mode */
+	if (ap->ops->set_mode) {
+		/* FIXME: make ->set_mode handle no device case and
+		 * return error code and failing device on failure as
+		 * ata_set_mode() does.
+		 */
+		for (i = 0; i < ATA_MAX_DEVICES; i++)
+			if (ata_dev_enabled(&ap->device[i])) {
+				ap->ops->set_mode(ap);
+				break;
+			}
+		rc = 0;
+	} else {
+		rc = ata_set_mode(ap, &dev);
+		if (rc) {
+			down_xfermask = 1;
+			goto fail;
+		}
+	}
+
+	goto out;
+
+ fail_reset:
+	if (!--reset_tries)
+		goto out;
+	if (reset == hardreset)
+		ata_down_sata_spd_limit(ap);
+	if (hardreset)
+		reset = hardreset;
+
+	printk(KERN_WARNING "ata%u: reset failed, will retry in 5 secs\n",
+	       ap->id);
+	ssleep(5);
+	goto retry;
+
+ fail:
+	switch (rc) {
+	case -EINVAL:
+	case -ENODEV:
+		tries[dev->devno] = 0;
+		break;
+	case -EIO:
+		ata_down_sata_spd_limit(ap);
+	default:
+		tries[dev->devno]--;
+		if (down_xfermask &&
+		    ata_down_xfermask_limit(ap, dev, tries[dev->devno] == 1))
+			tries[dev->devno] = 0;
+	}
+
+	if (hardreset)
+		reset = hardreset;
+
+	printk(KERN_WARNING "ata%u: some devices seem to be offline, will "
+	       "retry in 5 secs\n", ap->id);
+	ssleep(5);
+	goto retry;
+
+ out:
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+		dev->flags &= ~ATA_DFLAG_FAILED;
+		if (rc)
+			ata_dev_disable(ap, dev);
+	}
+
+	return rc;
+}
Index: work/include/linux/libata.h
===================================================================
--- work.orig/include/linux/libata.h	2006-04-03 16:37:20.000000000 +0900
+++ work/include/linux/libata.h	2006-04-03 16:37:32.000000000 +0900
@@ -236,6 +236,7 @@ enum {
 
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
+	ATA_EH_MAX_TRIES	= 3,
 };
 
 enum hsm_task_states {
@@ -692,6 +693,9 @@ extern unsigned int ata_eh_autopsy(struc
 extern void ata_eh_report(struct ata_port *ap, struct ata_queued_cmd *qc,
 			  const struct ata_taskfile *tf, u32 serror,
 			  unsigned int action, const char *desc);
+extern int ata_eh_revive(struct ata_port *ap, unsigned int action,
+			 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+			 ata_postreset_fn_t postreset);
 
 
 static inline int

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2006-04-03  7:42 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-03  3:44 [PATCHSET] new EH implementation Tejun Heo
2006-04-03  3:44 ` [PATCH 01/13] libata: add constants and flags to be used by EH Tejun Heo
2006-04-03  3:44 ` [PATCH 05/13] libata: implement ata_eh_determine_qc() Tejun Heo
2006-04-03  3:44 ` [PATCH 04/13] libata: implement EH utility functions Tejun Heo
2006-04-03  3:44 ` [PATCH 02/13] libata: implement ata_ering Tejun Heo
2006-04-03  3:44 ` [PATCH 03/13] libata: add per-dev ata_ering Tejun Heo
2006-04-03  3:44 ` [PATCH 07/13] libata: implement ata_eh_report() Tejun Heo
2006-04-03  3:44 ` [PATCH 06/13] libata: implement ata_eh_autopsy() Tejun Heo
2006-04-03  3:44 ` [PATCH 13/13] ahci: convert to new EH Tejun Heo
2006-04-03  3:44 ` [PATCH 11/13] ata_piix: " Tejun Heo
2006-04-03  3:44 ` [PATCH 08/13] libata: implement ata_eh_revive() Tejun Heo
2006-04-03  7:42   ` Tejun Heo
2006-04-03  3:44 ` [PATCH 12/13] sata_sil: convert to new EH Tejun Heo
2006-04-03  3:44 ` [PATCH 09/13] libata: implement ata_eh_finish_qcs() Tejun Heo
2006-04-03  3:44 ` [PATCH 10/13] libata: implement EH methods for BMDMA controllers Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).