From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org
Subject: [PATCH md ] Better handling of readerrors with raid5.
Date: Fri, 16 Sep 2005 13:01:57 +1000 [thread overview]
Message-ID: <1050916030157.11071@suse.de> (raw)
In-Reply-To: 20050916125754.11044.patches@notabene
TESTERS WANTED!! SEE BELOW...
This patch changes the behaviour of raid5 when it gets a read error.
Instead of just failing the device, it tried to find out what should
have been there, and writes it over the bad block. For some
media-errors, this has a reasonable chance of fixing the error.
If the write succeeds, and a subsequent read succeeds as well, raid5
decided the address is OK and conitnues.
I have tested this using the 'faulty' md personality, but it would be
really good to test it with real disks that have real errors. If
anyone has such drives in a cupboard (or even in a computer) and would
be willing to give this a try, I would really appreciate it.
Meanwhile, I think it is OK to go into -mm, but certainly not to go to
Linus yet.
Thanks,
NeilBrown
### Comments for Changeset
Instead of failing a drive on read-error, we attempt to
re-write the block, and then re-read. If that all works,
we allow the device to remain in the array.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 61 +++++++++++++++++++++++++++++++++++++++----
./include/linux/raid/raid5.h | 2 +
2 files changed, 58 insertions(+), 5 deletions(-)
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2005-09-16 12:21:24.000000000 +1000
+++ ./drivers/md/raid5.c 2005-09-16 12:57:12.000000000 +1000
@@ -349,7 +349,7 @@ static void shrink_stripes(raid5_conf_t
conf->slab_cache = NULL;
}
-static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done,
+static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
int error)
{
struct stripe_head *sh = bi->bi_private;
@@ -401,10 +401,27 @@ static int raid5_end_read_request (struc
}
#else
set_bit(R5_UPTODATE, &sh->dev[i].flags);
-#endif
+#endif
+ if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+ printk("R5: read error corrected!!\n");
+ clear_bit(R5_ReadError, &sh->dev[i].flags);
+ clear_bit(R5_ReWrite, &sh->dev[i].flags);
+ }
} else {
- md_error(conf->mddev, conf->disks[i].rdev);
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
+ if (conf->mddev->degraded) {
+ printk("R5: read error not correctable.\n");
+ clear_bit(R5_ReadError, &sh->dev[i].flags);
+ clear_bit(R5_ReWrite, &sh->dev[i].flags);
+ md_error(conf->mddev, conf->disks[i].rdev);
+ } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
+ /* Oh, no!!! */
+ printk("R5: read error NOT corrected!!\n");
+ clear_bit(R5_ReadError, &sh->dev[i].flags);
+ clear_bit(R5_ReWrite, &sh->dev[i].flags);
+ md_error(conf->mddev, conf->disks[i].rdev);
+ } else
+ set_bit(R5_ReadError, &sh->dev[i].flags);
}
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
#if 0
@@ -966,6 +983,12 @@ static void handle_stripe(struct stripe_
if (dev->written) written++;
rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
if (!rdev || !rdev->in_sync) {
+ /* The ReadError flag wil just be confusing now */
+ clear_bit(R5_ReadError, &dev->flags);
+ clear_bit(R5_ReWrite, &dev->flags);
+ }
+ if (!rdev || !rdev->in_sync
+ || test_bit(R5_ReadError, &dev->flags)) {
failed++;
failed_num = i;
} else
@@ -980,6 +1003,14 @@ static void handle_stripe(struct stripe_
if (failed > 1 && to_read+to_write+written) {
for (i=disks; i--; ) {
int bitmap_end = 0;
+
+ if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+ mdk_rdev_t *rdev = conf->disks[i].rdev;
+ if (rdev && rdev->in_sync)
+ /* multiple read failures in one stripe */
+ md_error(conf->mddev, rdev);
+ }
+
spin_lock_irq(&conf->device_lock);
/* fail all writes first */
bi = sh->dev[i].towrite;
@@ -1015,7 +1046,8 @@ static void handle_stripe(struct stripe_
}
/* fail any reads if this device is non-operational */
- if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
+ if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+ test_bit(R5_ReadError, &sh->dev[i].flags)) {
bi = sh->dev[i].toread;
sh->dev[i].toread = NULL;
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
@@ -1274,7 +1306,26 @@ static void handle_stripe(struct stripe_
md_done_sync(conf->mddev, STRIPE_SECTORS,1);
clear_bit(STRIPE_SYNCING, &sh->state);
}
-
+
+ /* If the failed drive is just a ReadError, then we might need to progress
+ * the repair/check process
+ */
+ if (failed == 1 && test_bit(R5_ReadError, &sh->dev[failed_num].flags)
+ && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
+ && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
+ ) {
+ dev = &sh->dev[failed_num];
+ if (!test_bit(R5_ReWrite, &dev->flags)) {
+ set_bit(R5_Wantwrite, &dev->flags);
+ set_bit(R5_ReWrite, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ } else {
+ /* let's read it back */
+ set_bit(R5_Wantread, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ }
+ }
+
spin_unlock(&sh->lock);
while ((bi=return_bi)) {
diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~ 2005-09-16 12:21:24.000000000 +1000
+++ ./include/linux/raid/raid5.h 2005-09-16 12:55:51.000000000 +1000
@@ -154,6 +154,8 @@ struct stripe_head {
#define R5_Wantwrite 5
#define R5_Syncio 6 /* this io need to be accounted as resync io */
#define R5_Overlap 7 /* There is a pending overlapping request on this block */
+#define R5_ReadError 8 /* seen a read error here recently */
+#define R5_ReWrite 9 /* have tried to over-write the readerror */
/*
* Write method
next parent reply other threads:[~2005-09-16 3:01 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20050916125754.11044.patches@notabene>
2005-09-16 3:01 ` NeilBrown [this message]
2005-09-16 16:53 ` [PATCH md ] Better handling of readerrors with raid5 Mike Hardy
2005-09-16 21:39 ` Neil Brown
2005-09-18 22:06 ` JaniD++
2005-09-21 0:15 ` Molle Bestefich
2005-09-21 9:14 ` Neil Brown
2005-09-21 15:07 ` Al Boldi
2005-10-23 3:57 ` Patrik Jonsson
2005-10-23 22:52 ` Neil Brown
2005-10-11 14:31 ` Mattias Wadenstein
2005-12-22 22:23 ` Stephan van Hienen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1050916030157.11071@suse.de \
--to=neilb@suse.de \
--cc=akpm@osdl.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.