From: "majianpeng" <majianpeng@gmail.com>
To: NeilBrown <neilb@suse.de>
Cc: linux-raid <linux-raid@vger.kernel.org>
Subject: Re: Re: md/raid5:Fix recover/replace stop if handle stipe failed
Date: Tue, 27 Mar 2012 15:33:25 +0800 [thread overview]
Message-ID: <201203271533149067900@gmail.com> (raw)
In-Reply-To: 20120327142642.3e40547d@notabene.brown
I tested and corrected some bug.
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 23ac880..f8480f8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,39 +2471,42 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0;
int i;
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0;
s->replacing = 0;
/* There is nothing more to do for sync/check/repair.
+ * Don't even need to abort as that is handled elsewhere
+ * if needed, and not always wanted e.g. if there is a known
+ * bad block here.
* For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery
*/
- if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
- return;
- /* During recovery devices cannot be removed, so locking and
- * refcounting of rdevs is not needed
- */
- for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = conf->disks[i].rdev;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- rdev = conf->disks[i].replacement;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- }
- if (abort) {
- conf->recovery_disabled = conf->mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
- }
+ if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+ /* During recovery devices cannot be removed, so
+ * locking and refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ struct md_rdev *rdev = conf->disks[i].rdev;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ rdev = conf->disks[i].replacement;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort)
+ conf->recovery_disabled =
+ conf->mddev->recovery_disabled;
+ } else
+ abort = 1;
+ md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
}
static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3206,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */;
else if (is_bad) {
/* also not in-sync */
- if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+ test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error
* which we can now try to correct
*/
------------------
majianpeng
2012-03-27
-------------------------------------------------------------
发件人:NeilBrown
发送日期:2012-03-27 11:26:50
收件人:majianpeng
抄送:linux-raid
主题:Re: md/raid5:Fix recover/replace stop if handle stipe failed
On Wed, 14 Mar 2012 17:27:44 +0800 "majianpeng" <majianpeng@gmail.com> wrote:
> I created a raid5 using three disks and disk0 add bad blocks.I set faulty disk2 and remov disk2 and readd disk2.
> It seems to recover well and set disk2 badblocks as disk0.
> But the md0_resync repeatly stop and start.
> The recovery_start of disk2 all the same .
>
Thanks for the extra details (and sorry for the delay in replying).
There certainly is something wrong with handling bad blocks during recovery.
I think this patch should fix it. Are you able to test it and confirm?
Thanks,
NeilBrown
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 23ac880..2186e0e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0;
int i;
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0;
s->replacing = 0;
/* There is nothing more to do for sync/check/repair.
+ * Don't even need to abort as that is handled elsewhere
+ * if needed, and not always wanted e.g. if there is a known
+ * bad block here.
* For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery
*/
- if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
- return;
- /* During recovery devices cannot be removed, so locking and
- * refcounting of rdevs is not needed
- */
- for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = conf->disks[i].rdev;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- rdev = conf->disks[i].replacement;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- }
- if (abort) {
- conf->recovery_disabled = conf->mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+ /* During recovery devices cannot be removed, so
+ * locking and refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ struct md_rdev *rdev = conf->disks[i].rdev;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ rdev = conf->disks[i].replacement;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort)
+ conf->recovery_disabled =
+ conf->mddev->recovery_disabled;
}
+ md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
}
static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */;
else if (is_bad) {
/* also not in-sync */
- if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+ test_bit(R5_UPTODATE, &sh->devs[i].flags)) {
/* treat as in-sync, but with a read error
* which we can now try to correct
*/
next prev parent reply other threads:[~2012-03-27 7:33 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-14 7:07 md/raid5:Fix recover/replace stop if handle stipe failed majianpeng
2012-03-14 7:33 ` NeilBrown
2012-03-14 9:27 ` majianpeng
2012-03-27 3:26 ` NeilBrown
2012-03-27 7:33 ` majianpeng [this message]
2012-03-28 1:40 ` NeilBrown
2012-03-28 1:45 ` majianpeng
2012-03-28 2:14 ` NeilBrown
2012-03-28 2:25 ` majianpeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201203271533149067900@gmail.com \
--to=majianpeng@gmail.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).