All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org
Subject: [PATCH md 014 of 14] Fix possible problem in raid1/raid10 error overwriting.
Date: Thu, 1 Dec 2005 14:24:02 +1100	[thread overview]
Message-ID: <1051201032402.29735@suse.de> (raw)
In-Reply-To: 20051201141508.29384.patches@notabene


The code to overwrite/reread for addressing read errors
in raid1/raid10 currently assumes that the read will
not alter the buffer which could be used to write to
the next device.  This is not a safe assumption to make.

So we split the loops into a overwrite loop and a separate re-read
loop, so that the writing is complete before reading is attempted.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/raid1.c  |   38 ++++++++++++++++++++++++++++++--------
 ./drivers/md/raid10.c |   22 ++++++++++++++++++----
 2 files changed, 48 insertions(+), 12 deletions(-)

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2005-12-01 14:03:25.000000000 +1100
+++ ./drivers/md/raid1.c	2005-12-01 14:03:40.000000000 +1100
@@ -1252,6 +1252,7 @@ static void sync_request_write(mddev_t *
 			} while (!success && d != r1_bio->read_disk);
 
 			if (success) {
+				int start = d;
 				/* write it back and re-read */
 				set_bit(R1BIO_Uptodate, &r1_bio->state);
 				while (d != r1_bio->read_disk) {
@@ -1265,14 +1266,23 @@ static void sync_request_write(mddev_t *
 							 sect + rdev->data_offset,
 							 s<<9,
 							 bio->bi_io_vec[idx].bv_page,
-							 WRITE) == 0 ||
-					    sync_page_io(rdev->bdev,
+							 WRITE) == 0)
+						md_error(mddev, rdev);
+				}
+				d = start;
+				while (d != r1_bio->read_disk) {
+					if (d == 0)
+						d = conf->raid_disks;
+					d--;
+					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+						continue;
+					rdev = conf->mirrors[d].rdev;
+					if (sync_page_io(rdev->bdev,
 							 sect + rdev->data_offset,
 							 s<<9,
 							 bio->bi_io_vec[idx].bv_page,
-							 READ) == 0) {
+							 READ) == 0)
 						md_error(mddev, rdev);
-					}
 				}
 			} else {
 				char b[BDEVNAME_SIZE];
@@ -1444,6 +1454,7 @@ static void raid1d(mddev_t *mddev)
 
 				if (success) {
 					/* write it back and re-read */
+					int start = d;
 					while (d != r1_bio->read_disk) {
 						if (d==0)
 							d = conf->raid_disks;
@@ -1453,13 +1464,24 @@ static void raid1d(mddev_t *mddev)
 						    test_bit(In_sync, &rdev->flags)) {
 							if (sync_page_io(rdev->bdev,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, WRITE) == 0 ||
-							    sync_page_io(rdev->bdev,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					d = start;
+					while (d != r1_bio->read_disk) {
+						if (d==0)
+							d = conf->raid_disks;
+						d--;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, READ) == 0) {
+									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
-							}
 						}
 					}
 				} else {

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2005-12-01 14:03:25.000000000 +1100
+++ ./drivers/md/raid10.c	2005-12-01 14:03:41.000000000 +1100
@@ -1421,6 +1421,7 @@ static void raid10d(mddev_t *mddev)
 				} while (!success && sl != r10_bio->read_slot);
 
 				if (success) {
+					int start = sl;
 					/* write it back and re-read */
 					while (sl != r10_bio->read_slot) {
 						int d;
@@ -1434,14 +1435,27 @@ static void raid10d(mddev_t *mddev)
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, WRITE) == 0 ||
-							    sync_page_io(rdev->bdev,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					sl = start;
+					while (sl != r10_bio->read_slot) {
+						int d;
+						if (sl==0)
+							sl = conf->copies;
+						sl--;
+						d = r10_bio->devs[sl].devnum;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, READ) == 0) {
+									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
-							}
 						}
 					}
 				} else {

      parent reply	other threads:[~2005-12-01  3:24 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-01  3:22 [PATCH md 000 of 14] Introduction NeilBrown
2005-12-01  3:22 ` [PATCH md 001 of 14] Support check-without-repair of raid10 arrays NeilBrown
2005-12-01  3:22 ` [PATCH md 002 of 14] Allow raid1 to check consistency NeilBrown
2005-12-01 22:34   ` Andrew Morton
2005-12-05 23:30     ` Neil Brown
2005-12-06  3:50       ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 003 of 14] Make sure read error on last working drive of raid1 actually returns failure NeilBrown
2005-12-01  3:23 ` [PATCH md 004 of 14] auto-correct correctable read errors in raid10 NeilBrown
2005-12-01  3:23 ` [PATCH md 005 of 14] raid10 read-error handling - resync and read-only NeilBrown
2005-12-01  3:23 ` [PATCH md 006 of 14] Make /proc/mdstat pollable NeilBrown
2005-12-01 22:39   ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 007 of 14] Clean up 'page' related names in md NeilBrown
2005-12-01  3:23 ` [PATCH md 008 of 14] Convert md to use kzalloc throughout NeilBrown
2005-12-01 22:42   ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 009 of 14] Tidy up raid5/6 hash table code NeilBrown
2005-12-01  3:23 ` [PATCH md 010 of 14] Convert various kmap calls to kmap_atomic NeilBrown
2005-12-01 22:46   ` Andrew Morton
2005-12-05 23:43     ` Neil Brown
2005-12-01  3:23 ` [PATCH md 011 of 14] Convert recently exported symbol to GPL NeilBrown
2005-12-01  3:23 ` [PATCH md 012 of 14] Break out of a loop that doesn't need to run to completion NeilBrown
2005-12-01  3:23 ` [PATCH md 013 of 14] Remove personality numbering from md NeilBrown
2005-12-01  3:24 ` NeilBrown [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1051201032402.29735@suse.de \
    --to=neilb@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.