linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <htejun@gmail.com>
To: jgarzik@pobox.com, alan@lxorguk.ukuu.org.uk, axboe@suse.de,
	albertcc@tw.ibm.com, lkosewsk@gmail.com,
	linux-ide@vger.kernel.org
Cc: Tejun Heo <htejun@gmail.com>
Subject: [PATCH 08/14] libata-eh: implement ata_eh_revive()
Date: Tue, 11 Apr 2006 22:48:22 +0900	[thread overview]
Message-ID: <11447633023982-git-send-email-htejun@gmail.com> (raw)
In-Reply-To: <11447633013561-git-send-email-htejun@gmail.com>

Implement EH helper function ata_eh_revive().  This function executes
what ata_eh_autopsy() and other parts of EH determined necessary to
resurrect the port.  As in ata_bus_probe(), each device is given fixed
number (ATA_EH_MAX_TRIES) of chances.  If a device uses up all its
chances and still fail to recover, it gets disabled.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |    1 
 drivers/scsi/libata-eh.c   |  206 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |    4 +
 3 files changed, 211 insertions(+), 0 deletions(-)

98acfd8b52af7a85887b7a38cadb9267a808bebc
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index e724a76..cb174cf 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5345,3 +5345,4 @@ EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
 EXPORT_SYMBOL_GPL(ata_eh_determine_qc);
 EXPORT_SYMBOL_GPL(ata_eh_autopsy);
 EXPORT_SYMBOL_GPL(ata_eh_report);
+EXPORT_SYMBOL_GPL(ata_eh_revive);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index eebb165..1d25d55 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -918,3 +918,209 @@ void ata_eh_report(struct ata_port *ap, 
 	       tf->command, tf->feature, serror, action,
 	       desc_head, desc, desc_tail);
 }
+
+static void ata_eh_wait_before_reset(struct ata_port *ap)
+{
+	int scr_valid = ap->cbl == ATA_CBL_SATA && ap->ops->scr_read;
+	unsigned long timeout;
+
+	/* Give devices time to get ready before trying the first
+	 * reset.  Without this, devices tend to fail the first reset
+	 * under certain circumstances and cause much longer delay.
+	 */
+	timeout = jiffies + 5 * HZ;
+	ssleep(1);
+
+	if (scr_valid) {
+		while (time_before(jiffies, timeout)) {
+			if ((scr_read(ap, SCR_STATUS) & 0xf) != 0x1)
+				break;
+			msleep(100);
+		}
+	}
+
+	if (!scr_valid || sata_dev_present(ap)) {
+		while (time_before(jiffies, timeout)) {
+			if (!(ata_chk_status(ap) & ATA_BUSY))
+				break;
+			msleep(100);
+		}
+	}
+}
+
+/**
+ *	ata_eh_revive - revive host port after error
+ *	@ap: host port to revive
+ *	@action: action to perform to revive @ap
+ *	@softreset: softreset method (can be NULL)
+ *	@hardreset: hardreset method (can be NULL)
+ *	@postreset: postreset method (can be NULL)
+ *
+ *	Perform action specified by @action to revive host port @ap
+ *	after error.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+int ata_eh_revive(struct ata_port *ap, unsigned int action,
+		  ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+		  ata_postreset_fn_t postreset)
+{
+	int scr_valid = ap->cbl == ATA_CBL_SATA && ap->ops->scr_read;
+	unsigned int classes[ATA_MAX_DEVICES];
+	int tries[ATA_MAX_DEVICES], reset_tries, nr_enabled;
+	struct ata_device *dev;
+	ata_reset_fn_t reset;
+	int i, down_xfermask, rc = 0;
+
+	if (!action)
+		goto out;
+
+	reset_tries = ATA_EH_MAX_TRIES;
+	nr_enabled = 0;
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		tries[i] = ATA_EH_MAX_TRIES;
+		if (ata_dev_enabled(&ap->device[i]))
+			nr_enabled++;
+	}
+
+	/* revalidate */
+	if (action == ATA_PORT_REVALIDATE) {
+		for (i = 0; i < ATA_MAX_DEVICES; i++) {
+			struct ata_device *dev = &ap->device[i];
+			if (!ata_dev_enabled(dev) ||
+			    !(dev->flags & ATA_DFLAG_FAILED))
+				continue;
+			if (ata_dev_revalidate(ap, dev, 0))
+				break;
+		}
+		if (i == ATA_MAX_DEVICES) {
+			rc = 0;
+			goto out;
+		}
+
+		action |= ATA_PORT_SOFTRESET;
+	}
+	action &= ~ATA_PORT_REVALIDATE;
+
+	/* Skip reset if possible. */
+	if (!nr_enabled && !(ap->flags & ATA_FLAG_FROZEN))
+		goto out;
+
+	/* give devices some time to breath */
+	ata_eh_wait_before_reset(ap);
+
+	if (softreset && (!hardreset || (!ata_set_sata_spd_needed(ap) &&
+					 action == ATA_PORT_SOFTRESET)))
+		reset = softreset;
+	else
+		reset = hardreset;
+
+ retry:
+	down_xfermask = 0;
+
+	/* reset.  postreset is responsible for thawing the port. */
+	printk("ata%u: %s resetting channel for error handling\n",
+	       ap->id, reset == softreset ? "soft" : "hard");
+
+	rc = ata_do_reset(ap, reset, postreset, classes);
+	if (rc)
+		goto fail_reset;
+
+	/* revalidate and reconfigure devices */
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+
+		if (!ata_dev_enabled(dev))
+			continue;
+
+		if (scr_valid && !sata_dev_present(ap)) {
+			rc = -EIO;
+			goto fail;
+		}
+
+		rc = ata_dev_revalidate(ap, dev, 1);
+		if (rc)
+			goto fail;
+	}
+
+	/* configure transfer mode */
+	if (ap->ops->set_mode) {
+		/* FIXME: make ->set_mode handle no device case and
+		 * return error code and failing device on failure as
+		 * ata_set_mode() does.
+		 */
+		for (i = 0; i < ATA_MAX_DEVICES; i++)
+			if (ata_dev_enabled(&ap->device[i])) {
+				ap->ops->set_mode(ap);
+				break;
+			}
+		rc = 0;
+	} else
+		rc = ata_set_mode(ap, &dev);
+
+	if (rc) {
+		down_xfermask = 1;
+		goto fail;
+	}
+
+	goto out;
+
+ fail_reset:
+	if (!--reset_tries) {
+		printk(KERN_ERR "ata%u: EH reset failed, giving up\n", ap->id);
+		goto out;
+	}
+	if (reset == hardreset)
+		ata_down_sata_spd_limit(ap);
+	if (hardreset)
+		reset = hardreset;
+
+	printk(KERN_WARNING "ata%u: EH reset failed, will retry in 5 secs\n",
+	       ap->id);
+	ssleep(5);
+	goto retry;
+
+ fail:
+	switch (rc) {
+	case -EINVAL:
+	case -ENODEV:
+		tries[dev->devno] = 0;
+		break;
+	case -EIO:
+		ata_down_sata_spd_limit(ap);
+	default:
+		tries[dev->devno]--;
+		if (down_xfermask &&
+		    ata_down_xfermask_limit(ap, dev, tries[dev->devno] == 1))
+			tries[dev->devno] = 0;
+	}
+
+	if (!tries[dev->devno]) {
+		ata_dev_disable(ap, dev);
+		nr_enabled--;
+	}
+
+	if (nr_enabled) {
+		printk(KERN_WARNING "ata%u: some devices seem to be offline, "
+		       "will retry in 5 secs\n", ap->id);
+		ssleep(5);
+	} else {
+		/* no device left, repeat fast */
+		msleep(500);
+	}
+
+	if (hardreset)
+		reset = hardreset;
+	goto retry;
+
+ out:
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+		dev->flags &= ~ATA_DFLAG_FAILED;
+		if (rc)
+			ata_dev_disable(ap, dev);
+	}
+
+	return rc;
+}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5efadab..22472f6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -238,6 +238,7 @@ enum {
 
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
+	ATA_EH_MAX_TRIES	= 3,
 };
 
 enum hsm_task_states {
@@ -696,6 +697,9 @@ extern unsigned int ata_eh_autopsy(struc
 extern void ata_eh_report(struct ata_port *ap, struct ata_queued_cmd *qc,
 			  const struct ata_taskfile *tf, u32 serror,
 			  unsigned int action, const char *desc);
+extern int ata_eh_revive(struct ata_port *ap, unsigned int action,
+			 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+			 ata_postreset_fn_t postreset);
 
 
 static inline int
-- 
1.2.4



  parent reply	other threads:[~2006-04-11 13:48 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-11 13:48 [PATCHSET 6/9] new EH implementation, take 2 Tejun Heo
2006-04-11 13:48 ` [PATCH 05/14] libata-eh: implement ata_eh_determine_qc() Tejun Heo
2006-04-11 13:48 ` [PATCH 06/14] libata-eh: implement ata_eh_autopsy() Tejun Heo
2006-04-11 13:48 ` [PATCH 11/14] ata_piix: convert to new EH Tejun Heo
2006-04-11 13:48 ` [PATCH 02/14] libata-eh: implement ata_ering Tejun Heo
2006-04-11 13:48 ` [PATCH 09/14] libata-eh: implement ata_eh_finish_qcs() Tejun Heo
2006-04-11 13:48 ` [PATCH 01/14] libata-eh: add constants and flags to be used by EH Tejun Heo
2006-04-11 13:48 ` [PATCH 04/14] libata-eh: implement EH utility functions Tejun Heo
2006-04-11 13:48 ` [PATCH 10/14] libata-eh: implement EH methods for BMDMA controllers Tejun Heo
2006-04-11 13:48 ` Tejun Heo [this message]
2006-04-19  9:08   ` [PATCH 08/14] libata-eh: implement ata_eh_revive() zhao, forrest
2006-04-19 10:33     ` Tejun Heo
2006-04-11 13:48 ` [PATCH 07/14] libata-eh: implement ata_eh_report() Tejun Heo
2006-04-11 13:48 ` [PATCH 03/14] libata-eh: add per-dev ata_ering Tejun Heo
2006-04-11 13:48 ` [PATCH 13/14] ahci: convert to new EH Tejun Heo
2006-04-20  6:01   ` zhao, forrest
2006-04-20  7:11     ` Tejun Heo
2006-04-20  7:44       ` Jeff Garzik
2006-04-21  1:34         ` Tejun Heo
2006-04-20  9:26   ` zhao, forrest
2006-04-21  1:20     ` Tejun Heo
2006-04-11 13:48 ` [PATCH 12/14] sata_sil: " Tejun Heo
2006-04-11 13:48 ` [PATCH 14/14] sata_sil24: " Tejun Heo
2006-04-27  9:16 ` [PATCHSET 6/9] new EH implementation, take 2 Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11447633023982-git-send-email-htejun@gmail.com \
    --to=htejun@gmail.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=albertcc@tw.ibm.com \
    --cc=axboe@suse.de \
    --cc=jgarzik@pobox.com \
    --cc=linux-ide@vger.kernel.org \
    --cc=lkosewsk@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).