linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: Niel Lambrechts <niel.lambrechts@gmail.com>
Cc: "linux.kernel" <linux-kernel@vger.kernel.org>
Subject: Re: 2.6.29 regression: ATA bus errors on resume
Date: Mon, 06 Apr 2009 12:39:47 -0700	[thread overview]
Message-ID: <49DA5A83.2070002@kernel.org> (raw)
In-Reply-To: <49DA489E.1030801@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1254 bytes --]

Hello,

Niel Lambrechts wrote:
> On 04/06/2009 12:09 PM, Tejun Heo wrote:
>>> Will the fix naturally make its way into the mainline kernel, or is
>>> there any extra debugging/testing I can help with?
>>>     
>> Well, the problem is the debug patch doesn't actually do anything
>> other than printing out messages.  It could be that the problem is
>> timing dependent (which is likely anyway).  You still can reporduce
>> the problem with the patch, right?
>>   
> Heh? You provided two patches, with the last one you said:

Yeah, the second one actually only added printks to see whether that's
the case.  No behavior change.

>> Strange.  Maybe IO commands are getting through while the sdev is
>> still in quiesce state?  Can you please repeat the test with the
>> attached patch?
>
> With the latter, I have not encountered the original problem i.e. any
> severe EXT4 corruption again, not in 2.6.29 and not in 2.6.29.1.

Eh... so, we're definitely seeing something which is dependent on
timing.

> Do I also need to try the last patch without any debugging messages?

Then there will be nothing left.  :-) Can you please try the attached
patch?  It's still only debug messages but lighter; hopefully, it
won't mask the problem.

Thanks.

-- 
tejun

[-- Attachment #2: libata-eh-debug-2.patch --]
[-- Type: text/x-patch, Size: 4377 bytes --]

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 0183131..2782bad 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1274,7 +1274,7 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
 {
 	struct ata_port *ap = link->ap;
 	struct ata_eh_info *ehi = &link->eh_info;
-	struct ata_eh_context *ehc = &link->eh_context;
+	//struct ata_eh_context *ehc = &link->eh_context;
 	unsigned long flags;
 
 	spin_lock_irqsave(ap->lock, flags);
@@ -1284,7 +1284,7 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
 	/* About to take EH action, set RECOVERED.  Ignore actions on
 	 * slave links as master will do them again.
 	 */
-	if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
+	if (/*!(ehc->i.flags & ATA_EHI_QUIET) && */link != ap->slave_link)
 		ap->pflags |= ATA_PFLAG_RECOVERED;
 
 	spin_unlock_irqrestore(ap->lock, flags);
@@ -2017,8 +2017,13 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 
 		/* determine whether the command is worth retrying */
 		if (!(qc->err_mask & AC_ERR_INVALID) &&
-		    ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV))
+		    ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) {
+			/*ata_dev_printk(qc->dev, KERN_INFO,
+			  "XXX setting retry on qc%d\n", tag);*/
 			qc->flags |= ATA_QCFLAG_RETRY;
+		} else
+			/*ata_dev_printk(qc->dev, KERN_INFO,
+			  "XXX no retry for qc%d\n", tag)*/;
 
 		/* accumulate error info */
 		ehc->i.dev = qc->dev;
@@ -2126,8 +2131,8 @@ static void ata_eh_link_report(struct ata_link *link)
 	char tries_buf[6];
 	int tag, nr_failed = 0;
 
-	if (ehc->i.flags & ATA_EHI_QUIET)
-		return;
+	/*if (ehc->i.flags & ATA_EHI_QUIET)
+		return;*/
 
 	desc = NULL;
 	if (ehc->i.desc[0] != '\0')
@@ -2147,8 +2152,8 @@ static void ata_eh_link_report(struct ata_link *link)
 		nr_failed++;
 	}
 
-	if (!nr_failed && !ehc->i.err_mask)
-		return;
+	/*if (!nr_failed && !ehc->i.err_mask)
+		return;*/
 
 	frozen = "";
 	if (ap->pflags & ATA_PFLAG_FROZEN)
@@ -3350,16 +3355,23 @@ void ata_eh_finish(struct ata_port *ap)
 			 * generate sense data in this function,
 			 * considering both err_mask and tf.
 			 */
-			if (qc->flags & ATA_QCFLAG_RETRY)
+			if (qc->flags & ATA_QCFLAG_RETRY) {
+				/*ata_dev_printk(qc->dev, KERN_INFO, "XXX retrying qc%d, retries=%d allowed=%d\n",
+				  tag, qc->scsicmd->retries, qc->scsicmd->allowed);*/
 				ata_eh_qc_retry(qc);
-			else
+			} else {
+				/*ata_dev_printk(qc->dev, KERN_INFO, "XXX terminating qc%d\n", tag);*/
 				ata_eh_qc_complete(qc);
+			}
 		} else {
 			if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
+				/*ata_dev_printk(qc->dev, KERN_INFO, "XXX terminating qc%d (SENSE), retries=%d\n",
+				  tag, qc->scsicmd->retries);*/
 				ata_eh_qc_complete(qc);
 			} else {
 				/* feed zero TF to sense generation */
 				memset(&qc->result_tf, 0, sizeof(qc->result_tf));
+				/*ata_dev_printk(qc->dev, KERN_INFO, "XXX retrying qc%d (bogus SENSE)\n", tag);*/
 				ata_eh_qc_retry(qc);
 			}
 		}
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 0c2c73b..608bacd 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1569,13 +1569,16 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
 		list_del_init(&scmd->eh_entry);
+		printk("XXX scsi_eh_flush_done_q: online=%d(%d) noretry=%d retries=%d allowed=%d\n",
+		       scsi_device_online(scmd->device), scmd->device->sdev_state,
+		       scsi_noretry_cmd(scmd), scmd->retries, scmd->allowed);
 		if (scsi_device_online(scmd->device) &&
 		    !scsi_noretry_cmd(scmd) &&
 		    (++scmd->retries <= scmd->allowed)) {
-			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
+			/*SCSI_LOG_ERROR_RECOVERY(3, */printk("%s: flush"
 							  " retry cmd: %p\n",
 							  current->comm,
-							  scmd));
+							      scmd)/*)*/;
 				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
 		} else {
 			/*
@@ -1585,9 +1588,9 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 			 */
 			if (!scmd->result)
 				scmd->result |= (DRIVER_TIMEOUT << 24);
-			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
+			/*SCSI_LOG_ERROR_RECOVERY(3,*/ printk("%s: flush finish"
 							" cmd: %p\n",
-							current->comm, scmd));
+							      current->comm, scmd)/*)*/;
 			scsi_finish_command(scmd);
 		}
 	}

  reply	other threads:[~2009-04-06 19:39 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <ckpL0-3TE-3@gated-at.bofh.it>
     [not found] ` <ckpL0-3TE-5@gated-at.bofh.it>
     [not found]   ` <ckpL0-3TE-7@gated-at.bofh.it>
     [not found]     ` <ckpL0-3TE-9@gated-at.bofh.it>
     [not found]       ` <ckpL0-3TE-11@gated-at.bofh.it>
     [not found]         ` <ckpL0-3TE-1@gated-at.bofh.it>
     [not found]           ` <cllvN-2Gf-1@gated-at.bofh.it>
2009-03-30 14:30             ` 2.6.29 regression: ATA bus errors on resume Niel Lambrechts
2009-03-30 14:40               ` Jeff Garzik
2009-04-01 19:48                 ` Niel Lambrechts
2009-04-03 20:09                   ` Jeff Garzik
2009-04-03 20:54                     ` Niel Lambrechts
2009-04-02  1:50               ` Tejun Heo
2009-04-02  6:20                 ` Niel Lambrechts
2009-04-02  6:52                   ` Tejun Heo
2009-04-02 11:03                     ` Niel Lambrechts
2009-04-02 14:15                       ` Niel Lambrechts
2009-04-04  4:54                         ` Tejun Heo
2009-04-06  5:01                           ` Niel Lambrechts
2009-04-06 10:09                             ` Tejun Heo
2009-04-06 18:23                               ` Niel Lambrechts
2009-04-06 19:39                                 ` Tejun Heo [this message]
2009-04-06 21:26                                   ` Niel Lambrechts
2009-04-09 18:18                                     ` Tejun Heo
2009-05-23  9:17                                       ` Niel Lambrechts
2009-05-23 10:26                                         ` 2.6.29 regression: ATA bus errors on resume (output with debug patch) Niel Lambrechts
2009-05-25  0:32                                           ` Tejun Heo
     [not found] <clqON-2Xv-7@gated-at.bofh.it>
     [not found] ` <clqON-2Xv-9@gated-at.bofh.it>
     [not found]   ` <clqON-2Xv-11@gated-at.bofh.it>
     [not found]     ` <clqON-2Xv-13@gated-at.bofh.it>
     [not found]       ` <clqON-2Xv-15@gated-at.bofh.it>
     [not found]         ` <clqON-2Xv-17@gated-at.bofh.it>
     [not found]           ` <clqON-2Xv-19@gated-at.bofh.it>
     [not found]             ` <clqON-2Xv-5@gated-at.bofh.it>
     [not found]               ` <clqYt-3bu-5@gated-at.bofh.it>
2009-03-30 18:24                 ` 2.6.29 regression: ATA bus errors on resume Niel Lambrechts
2009-03-30 19:17                   ` Jeff Garzik
     [not found]               ` <cmknZ-8lW-9@gated-at.bofh.it>
     [not found]                 ` <cmoBl-6Ok-21@gated-at.bofh.it>
     [not found]                   ` <cmp4n-7rb-15@gated-at.bofh.it>
     [not found]                     ` <cmsYg-5BR-27@gated-at.bofh.it>
     [not found]                       ` <cmvW7-1Yj-23@gated-at.bofh.it>
     [not found]                         ` <cnheh-3vO-7@gated-at.bofh.it>
     [not found]                           ` <cnPg1-7Q4-19@gated-at.bofh.it>
     [not found]                             ` <cnTWo-7bV-25@gated-at.bofh.it>
     [not found]                               ` <co1Kd-350-5@gated-at.bofh.it>
     [not found]                                 ` <co2Qf-4QQ-27@gated-at.bofh.it>
     [not found]                                   ` <co4yj-7Mc-5@gated-at.bofh.it>
     [not found]                                     ` <cp71c-4py-29@gated-at.bofh.it>
     [not found]                                       ` <cEVyE-re-1@gated-at.bofh.it>
2009-05-23  9:36                                         ` Niel Lambrechts
2009-05-25  1:10                                           ` Tejun Heo
2009-05-25  8:15                                             ` Alan Cox
2009-05-25 22:06                                               ` Niel Lambrechts
2009-05-26  4:58                                                 ` Tejun Heo
2009-05-26  5:43                                                   ` Niel Lambrechts
2009-05-26  5:50                                                     ` Tejun Heo
2009-05-26  6:13                                                       ` Niel Lambrechts
2009-05-26 13:33                                                         ` Tejun Heo
2009-05-26 18:14                                                           ` Niel Lambrechts
2009-05-27  0:07                                                             ` Tejun Heo
2009-05-27 14:01                                                               ` Niel Lambrechts
2009-06-01 18:57                                                                 ` Niel Lambrechts
2009-06-03  3:14                                                                   ` Tejun Heo
2009-06-03  4:28                                                                     ` Tejun Heo
2009-06-06  7:05                                                                       ` Niel Lambrechts
2009-06-19 15:04                                                                         ` Pavel Machek
2009-06-25 12:57                                                                         ` Tejun Heo
2009-06-25 15:25                                                                           ` Niel Lambrechts
2009-06-26  0:46                                                                             ` Tejun Heo
2009-06-26  6:24                                                                               ` Niel Lambrechts
2009-09-18 20:26                                                                                 ` Berthold Gunreben
2009-09-25  4:11                                                                                   ` Tejun Heo
2009-09-30  9:58                                                                                     ` Berthold Gunreben
2009-09-30 10:26                                                                                       ` Tejun Heo
2009-05-26  4:58                                               ` Tejun Heo
     [not found] <cjtH6-3Ll-13@gated-at.bofh.it>
     [not found] ` <cjtH6-3Ll-15@gated-at.bofh.it>
     [not found]   ` <cjtH6-3Ll-11@gated-at.bofh.it>
     [not found]     ` <cjutt-577-11@gated-at.bofh.it>
     [not found]       ` <cjJCb-47c-23@gated-at.bofh.it>
2009-03-27 19:10         ` Niel Lambrechts
2009-03-27 22:30           ` Arjan van de Ven
2009-03-28 10:22             ` Niel Lambrechts
2009-03-28 14:06               ` Rafael J. Wysocki
2009-03-30  8:43                 ` Tejun Heo
2009-03-30  8:55           ` Tejun Heo
     [not found] <cjlqb-7sp-1@gated-at.bofh.it>
     [not found] ` <cjq6y-6sq-11@gated-at.bofh.it>
2009-03-25  5:19   ` 2.6.29 regression: ATA bus errors on resume (was: EXT4: __ext4_get_inode_loc errors after s2disk) Niel Lambrechts
2009-03-25  6:06     ` 2.6.29 regression: ATA bus errors on resume Jeff Garzik
2009-03-25 21:40       ` Niel Lambrechts
2009-03-25 22:16       ` James Bottomley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49DA5A83.2070002@kernel.org \
    --to=tj@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=niel.lambrechts@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).