[PATCH md 005 of 14] raid10 read-error handling - resync and read-only

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org
Subject: [PATCH md 005 of 14] raid10 read-error handling - resync and read-only
Date: Thu, 1 Dec 2005 14:23:11 +1100	[thread overview]
Message-ID: <1051201032311.29605@suse.de> (raw)
In-Reply-To: 20051201141508.29384.patches@notabene


Add in correct read-error handling for resync and read-only
situations.
When read-only, we don't over-write, so we need to mark the failed
drive in the r10_bio so we don't re-try it.
During resync, we always read all blocks, so if there is a read error,
we simply over-write it with the good block that we found (assuming
we found one).

Note that the recovery case still isn't handled in an interesting way.
There is nothing useful to do for the 2-copies case.  If there are 3
or more copies, then we could try reading from one of the non-missing
copies, but this is a bit complicated and very rarely would be used,
so I'm leaving it for now.

Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/raid10.c         |   56 ++++++++++++++++++++++++++----------------
 ./include/linux/raid/raid10.h |    7 +++++
 2 files changed, 42 insertions(+), 21 deletions(-)

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2005-11-29 14:07:05.000000000 +1100
+++ ./drivers/md/raid10.c	2005-11-29 14:11:01.000000000 +1100
@@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r
 
 	for (i = 0; i < conf->copies; i++) {
 		struct bio **bio = & r10_bio->devs[i].bio;
-		if (*bio)
+		if (*bio && *bio != IO_BLOCKED)
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r1
 		disk = r10_bio->devs[slot].devnum;
 
 		while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+		       r10_bio->devs[slot].bio == IO_BLOCKED ||
 		       !test_bit(In_sync, &rdev->flags)) {
 			slot++;
 			if (slot == conf->copies) {
@@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r1
 	slot = 0;
 	disk = r10_bio->devs[slot].devnum;
 	while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+	       r10_bio->devs[slot].bio == IO_BLOCKED ||
 	       !test_bit(In_sync, &rdev->flags)) {
 		slot ++;
 		if (slot == conf->copies) {
@@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r1
 
 
 		if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+		    r10_bio->devs[nslot].bio == IO_BLOCKED ||
 		    !test_bit(In_sync, &rdev->flags))
 			continue;
 
@@ -1104,7 +1107,6 @@ abort:
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
 	int i,d;
@@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio
 		BUG();
 	update_head_pos(i, r10_bio);
 	d = r10_bio->devs[i].devnum;
-	if (!uptodate)
+
+	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+		set_bit(R10BIO_Uptodate, &r10_bio->state);
+	else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
 		md_error(r10_bio->mddev,
 			 conf->mirrors[d].rdev);
 
@@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *
 	fbio = r10_bio->devs[i].bio;
 
 	/* now find blocks with errors */
-	for (i=first+1 ; i < conf->copies ; i++) {
-		int vcnt, j, d;
+	for (i=0 ; i < conf->copies ; i++) {
+		int  j, d;
+		int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
 
-		if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-			continue;
-		/* We know that the bi_io_vec layout is the same for
-		 * both 'first' and 'i', so we just compare them.
-		 * All vec entries are PAGE_SIZE;
-		 */
 		tbio = r10_bio->devs[i].bio;
-		vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
-		for (j = 0; j < vcnt; j++)
-			if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-				   page_address(tbio->bi_io_vec[j].bv_page),
-				   PAGE_SIZE))
-				break;
-		if (j == vcnt)
+
+		if (tbio->bi_end_io != end_sync_read)
+			continue;
+		if (i == first)
 			continue;
-		mddev->resync_mismatches += r10_bio->sectors;
+		if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+			/* We know that the bi_io_vec layout is the same for
+			 * both 'first' and 'i', so we just compare them.
+			 * All vec entries are PAGE_SIZE;
+			 */
+			for (j = 0; j < vcnt; j++)
+				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
+					   page_address(tbio->bi_io_vec[j].bv_page),
+					   PAGE_SIZE))
+					break;
+			if (j == vcnt)
+				continue;
+			mddev->resync_mismatches += r10_bio->sectors;
+		}
 		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
 			/* Don't fix anything. */
 			continue;
@@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev
 
 	atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 	md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-	generic_make_request(wbio);
+	if (test_bit(R10BIO_Uptodate, &r10_bio->state))
+		generic_make_request(wbio);
+	else
+		bio_endio(wbio, wbio->bi_size, -EIO);
 }
 
 
@@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev)
 			unfreeze_array(conf);
 
 			bio = r10_bio->devs[r10_bio->read_slot].bio;
-			r10_bio->devs[r10_bio->read_slot].bio = NULL;
+			r10_bio->devs[r10_bio->read_slot].bio =
+				mddev->ro ? IO_BLOCKED : NULL;
 			bio_put(bio);
 			mirror = read_balance(conf, r10_bio);
 			if (mirror == -1) {

diff ./include/linux/raid/raid10.h~current~ ./include/linux/raid/raid10.h
--- ./include/linux/raid/raid10.h~current~	2005-11-29 14:07:05.000000000 +1100
+++ ./include/linux/raid/raid10.h	2005-11-29 12:09:11.000000000 +1100
@@ -104,6 +104,13 @@ struct r10bio_s {
 	} devs[0];
 };
 
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error.  To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
 /* bits for r10bio.state */
 #define	R10BIO_Uptodate	0
 #define	R10BIO_IsSync	1

next prev parent reply	other threads:[~2005-12-01  3:23 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-01  3:22 [PATCH md 000 of 14] Introduction NeilBrown
2005-12-01  3:22 ` [PATCH md 001 of 14] Support check-without-repair of raid10 arrays NeilBrown
2005-12-01  3:22 ` [PATCH md 002 of 14] Allow raid1 to check consistency NeilBrown
2005-12-01 22:34   ` Andrew Morton
2005-12-05 23:30     ` Neil Brown
2005-12-06  3:50       ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 003 of 14] Make sure read error on last working drive of raid1 actually returns failure NeilBrown
2005-12-01  3:23 ` [PATCH md 004 of 14] auto-correct correctable read errors in raid10 NeilBrown
2005-12-01  3:23 ` NeilBrown [this message]
2005-12-01  3:23 ` [PATCH md 006 of 14] Make /proc/mdstat pollable NeilBrown
2005-12-01 22:39   ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 007 of 14] Clean up 'page' related names in md NeilBrown
2005-12-01  3:23 ` [PATCH md 008 of 14] Convert md to use kzalloc throughout NeilBrown
2005-12-01 22:42   ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 009 of 14] Tidy up raid5/6 hash table code NeilBrown
2005-12-01  3:23 ` [PATCH md 010 of 14] Convert various kmap calls to kmap_atomic NeilBrown
2005-12-01 22:46   ` Andrew Morton
2005-12-05 23:43     ` Neil Brown
2005-12-01  3:23 ` [PATCH md 011 of 14] Convert recently exported symbol to GPL NeilBrown
2005-12-01  3:23 ` [PATCH md 012 of 14] Break out of a loop that doesn't need to run to completion NeilBrown
2005-12-01  3:23 ` [PATCH md 013 of 14] Remove personality numbering from md NeilBrown
2005-12-01  3:24 ` [PATCH md 014 of 14] Fix possible problem in raid1/raid10 error overwriting NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1051201032311.29605@suse.de \
    --to=neilb@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).