All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <htejun@gmail.com>
To: jgarzik@pobox.com, alan@lxorguk.ukuu.org.uk, axboe@suse.de,
	albertcc@tw.ibm.com, forrest.zhao@intel.com, efalk@google.com,
	linux-ide@vger.kernel.org
Cc: Tejun Heo <htejun@gmail.com>
Subject: [PATCH 09/11] libata-eh-fw: update ata_scsi_error() for new EH
Date: Thu, 11 May 2006 21:27:24 +0900	[thread overview]
Message-ID: <11473504443148-git-send-email-htejun@gmail.com> (raw)
In-Reply-To: <11473504433328-git-send-email-htejun@gmail.com>

Update ata_scsi_error() for new EH.  ata_scsi_error() is responsible
for claiming timed out qcs and invoking ->error_handler in safe and
synchronized manner.  As the state of the controller is unknown if a
qc has timed out, the port is frozen in such cases.

Note that ata_scsi_timed_out() isn't used for new EH.  This is because
a timed out qc cannot be claimed by EH without freezing the port and
freezing the port in ata_scsi_timed_out() results in unnecessary
abortion of other active qcs.  ata_scsi_timed_out() can be removed
once all drivers are converted to new EH.

While at it, add 'TODO: kill' comments to old EH functions.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-eh.c |  136 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/libata.h   |    3 +
 2 files changed, 134 insertions(+), 5 deletions(-)

610d1ef7cf0e6a147bc190ebb2f55afabf764df4
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index cb4e2b8..0803231 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -44,6 +44,8 @@
 
 #include "libata.h"
 
+static void __ata_port_freeze(struct ata_port *ap);
+
 /**
  *	ata_scsi_timed_out - SCSI layer time out callback
  *	@cmd: timed out SCSI command
@@ -55,6 +57,8 @@
  *	from finishing it by setting EH_SCHEDULED and return
  *	EH_NOT_HANDLED.
  *
+ *	TODO: kill this function once old EH is gone.
+ *
  *	LOCKING:
  *	Called from timer context
  *
@@ -67,10 +71,16 @@ enum scsi_eh_timer_return ata_scsi_timed
 	struct ata_port *ap = ata_shost_to_port(host);
 	unsigned long flags;
 	struct ata_queued_cmd *qc;
-	enum scsi_eh_timer_return ret = EH_HANDLED;
+	enum scsi_eh_timer_return ret;
 
 	DPRINTK("ENTER\n");
 
+	if (ap->ops->error_handler) {
+		ret = EH_NOT_HANDLED;
+		goto out;
+	}
+
+	ret = EH_HANDLED;
 	spin_lock_irqsave(&ap->host_set->lock, flags);
 	qc = ata_qc_from_tag(ap, ap->active_tag);
 	if (qc) {
@@ -81,6 +91,7 @@ enum scsi_eh_timer_return ata_scsi_timed
 	}
 	spin_unlock_irqrestore(&ap->host_set->lock, flags);
 
+ out:
 	DPRINTK("EXIT, ret=%d\n", ret);
 	return ret;
 }
@@ -100,21 +111,132 @@ enum scsi_eh_timer_return ata_scsi_timed
 void ata_scsi_error(struct Scsi_Host *host)
 {
 	struct ata_port *ap = ata_shost_to_port(host);
+	spinlock_t *hs_lock = &ap->host_set->lock;
+	int i, repeat_cnt = ATA_EH_MAX_REPEAT;
+	unsigned long flags;
 
 	DPRINTK("ENTER\n");
 
-	/* synchronize with IRQ handler and port task */
-	spin_unlock_wait(&ap->host_set->lock);
+	/* synchronize with port task */
 	ata_port_flush_task(ap);
 
-	WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
+	/* synchronize with host_set lock and sort out timeouts */
 
-	ap->ops->eng_timeout(ap);
+	/* For new EH, all qcs are finished in one of three ways -
+	 * normal completion, error completion, and SCSI timeout.
+	 * Both cmpletions can race against SCSI timeout.  When normal
+	 * completion wins, the qc never reaches EH.  When error
+	 * completion wins, the qc has ATA_QCFLAG_FAILED set.
+	 *
+	 * When SCSI timeout wins, things are a bit more complex.
+	 * Normal or error completion can occur after the timeout but
+	 * before this point.  In such cases, both types of
+	 * completions are honored.  A scmd is determined to have
+	 * timed out iff its associated qc is active and not failed.
+	 */
+	if (ap->ops->error_handler) {
+		struct scsi_cmnd *scmd, *tmp;
+		int nr_timedout = 0;
+
+		spin_lock_irqsave(hs_lock, flags);
+
+		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
+			struct ata_queued_cmd *qc;
+
+			for (i = 0; i < ATA_MAX_QUEUE; i++) {
+				qc = __ata_qc_from_tag(ap, i);
+				if (qc->flags & ATA_QCFLAG_ACTIVE &&
+				    qc->scsicmd == scmd)
+					break;
+			}
+
+			if (i < ATA_MAX_QUEUE) {
+				/* the scmd has an associated qc */
+				if (!(qc->flags & ATA_QCFLAG_FAILED)) {
+					/* which hasn't failed yet, timeout */
+					qc->err_mask |= AC_ERR_TIMEOUT;
+					qc->flags |= ATA_QCFLAG_FAILED;
+					nr_timedout++;
+				}
+			} else {
+				/* Normal completion occurred after
+				 * SCSI timeout but before this point.
+				 * Successfully complete it.
+				 */
+				scmd->retries = scmd->allowed;
+				scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+			}
+		}
 
+		/* If we have timed out qcs.  They belong to EH from
+		 * this point but the state of the controller is
+		 * unknown.  Freeze the port to make sure the IRQ
+		 * handler doesn't diddle with those qcs.  This must
+		 * be done atomically w.r.t. setting QCFLAG_FAILED.
+		 */
+		if (nr_timedout)
+			__ata_port_freeze(ap);
+
+		spin_unlock_irqrestore(hs_lock, flags);
+	} else
+		spin_unlock_wait(hs_lock);
+
+ repeat:
+	/* invoke error handler */
+	if (ap->ops->error_handler) {
+		/* clear EH pending */
+		spin_lock_irqsave(hs_lock, flags);
+		ap->flags &= ~ATA_FLAG_EH_PENDING;
+		spin_unlock_irqrestore(hs_lock, flags);
+
+		/* invoke EH */
+		ap->ops->error_handler(ap);
+
+		/* Exception might have happend after ->error_handler
+		 * recovered the port but before this point.  Repeat
+		 * EH in such case.
+		 */
+		spin_lock_irqsave(hs_lock, flags);
+
+		if (ap->flags & ATA_FLAG_EH_PENDING) {
+			if (--repeat_cnt) {
+				ata_port_printk(ap, KERN_INFO,
+					"EH pending after completion, "
+					"repeating EH (cnt=%d)\n", repeat_cnt);
+				spin_unlock_irqrestore(hs_lock, flags);
+				goto repeat;
+			}
+			ata_port_printk(ap, KERN_ERR, "EH pending after %d "
+					"tries, giving up\n", ATA_EH_MAX_REPEAT);
+		}
+
+		/* Clear host_eh_scheduled while holding hs_lock such
+		 * that if exception occurs after this point but
+		 * before EH completion, SCSI midlayer will
+		 * re-initiate EH.
+		 */
+		host->host_eh_scheduled = 0;
+
+		spin_unlock_irqrestore(hs_lock, flags);
+	} else {
+		WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
+		ap->ops->eng_timeout(ap);
+	}
+
+	/* finish or retry handled scmd's and clean up */
 	WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
 
 	scsi_eh_flush_done_q(&ap->eh_done_q);
 
+	/* clean up */
+	spin_lock_irqsave(hs_lock, flags);
+
+	if (ap->flags & ATA_FLAG_RECOVERED)
+		ata_port_printk(ap, KERN_INFO, "EH complete\n");
+	ap->flags &= ~ATA_FLAG_RECOVERED;
+
+	spin_unlock_irqrestore(hs_lock, flags);
+
 	DPRINTK("EXIT\n");
 }
 
@@ -133,6 +255,8 @@ void ata_scsi_error(struct Scsi_Host *ho
  *	an interrupt was not delivered to the driver, even though the
  *	transaction completed successfully.
  *
+ *	TODO: kill this function once old EH is gone.
+ *
  *	LOCKING:
  *	Inherited from SCSI layer (none, can sleep)
  */
@@ -198,6 +322,8 @@ static void ata_qc_timeout(struct ata_qu
  *	an interrupt was not delivered to the driver, even though the
  *	transaction completed successfully.
  *
+ *	TODO: kill this function once old EH is gone.
+ *
  *	LOCKING:
  *	Inherited from SCSI layer (none, can sleep)
  */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3f22eff..e4353cf 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -225,6 +225,9 @@ enum {
 	ATA_PORT_PRIMARY	= (1 << 0),
 	ATA_PORT_SECONDARY	= (1 << 1),
 
+	/* max repeat if error condition is still set after ->error_handler */
+	ATA_EH_MAX_REPEAT	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 };
-- 
1.2.4



  parent reply	other threads:[~2006-05-11 12:27 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-05-11 12:27 [PATCHSET 02/11] new EH framework, take 3 Tejun Heo
2006-05-11 12:27 ` [PATCH 02/11] libata-eh-fw: clear SError in ata_std_postreset() Tejun Heo
2006-05-11 12:27 ` [PATCH 03/11] libata-eh-fw: use special reserved tag and qc for internal commands Tejun Heo
2006-05-11 12:27 ` [PATCH 04/11] libata-eh-fw: update ata_qc_from_tag() to enforce normal/EH qc ownership Tejun Heo
2006-05-11 12:27 ` [PATCH 01/11] libata-eh-fw: add flags and operations for new EH Tejun Heo
2006-05-11 12:27 ` [PATCH 11/11] libata-eh-fw: update SCSI command completion path " Tejun Heo
2006-05-11 12:27 ` [PATCH 08/11] libata-eh-fw: implement new EH scheduling from PIO Tejun Heo
2006-05-18 10:42   ` Albert Lee
2006-05-18 11:49     ` Tejun Heo
2006-05-19  7:31       ` Albert Lee
2006-05-11 12:27 ` [PATCH 05/11] libata-eh-fw: implement new EH scheduling via error completion Tejun Heo
2006-05-11 12:27 ` [PATCH 10/11] libata-eh-fw: update ata_exec_internal() for new EH Tejun Heo
2006-05-11 12:27 ` [PATCH 07/11] libata-eh-fw: implement freeze/thaw Tejun Heo
2006-05-16 10:15   ` Albert Lee
2006-05-16 10:30     ` Tejun Heo
2006-05-16 10:43       ` Albert Lee
2006-05-16 11:17       ` Albert Lee
2006-05-11 12:27 ` Tejun Heo [this message]
2006-05-11 12:27 ` [PATCH 06/11] libata-eh-fw: implement ata_port_schedule_eh() and ata_port_abort() Tejun Heo
2006-05-13 22:01 ` [PATCHSET 02/11] new EH framework, take 3 Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11473504443148-git-send-email-htejun@gmail.com \
    --to=htejun@gmail.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=albertcc@tw.ibm.com \
    --cc=axboe@suse.de \
    --cc=efalk@google.com \
    --cc=forrest.zhao@intel.com \
    --cc=jgarzik@pobox.com \
    --cc=linux-ide@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.