From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 06/17] md/raid10: Handle replacement devices during resync.
Date: Wed, 02 Nov 2011 16:25:44 +1100 [thread overview]
Message-ID: <20111102052544.17566.72926.stgit@notabene.brown> (raw)
In-Reply-To: <20111102051851.17566.52748.stgit@notabene.brown>
If we need to resync an array which has replacement devices,
we always write any block checked to every replacement.
If the resync was bitmap-based resync we will then complete the
replacement normally.
If it was a full resync, we mark the replacements as fully recovered
when the resync finishes so no further recovery is needed.
Signed-off-by: NeilBrown <neilb@suse.de>
---
drivers/md/raid10.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0db0402..a2341ca 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1596,19 +1596,29 @@ static void end_sync_write(struct bio *bio, int error)
sector_t first_bad;
int bad_sectors;
int slot;
+ int repl;
+ struct md_rdev *rdev;
- d = find_bio_disk(conf, r10_bio, bio, &slot, NULL);
+ d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
+ if (repl)
+ rdev = conf->mirrors[d].replacement;
+ else
+ rdev = conf->mirrors[d].rdev;
if (!uptodate) {
- set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
- set_bit(R10BIO_WriteError, &r10_bio->state);
- } else if (is_badblock(conf->mirrors[d].rdev,
+ if (repl)
+ md_error(mddev, rdev);
+ else {
+ set_bit(WriteErrorSeen, &rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ }
+ } else if (is_badblock(rdev,
r10_bio->devs[slot].addr,
r10_bio->sectors,
&first_bad, &bad_sectors))
set_bit(R10BIO_MadeGood, &r10_bio->state);
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ rdev_dec_pending(rdev, mddev);
end_sync_request(r10_bio);
}
@@ -1712,6 +1722,29 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
generic_make_request(tbio);
}
+ /* Now write out to any replacement devices
+ * that are active
+ */
+ for (i = 0; i < conf->copies; i++) {
+ int j, d;
+ int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
+
+ tbio = r10_bio->devs[i].repl_bio;
+ if (!tbio || !tbio->bi_end_io)
+ continue;
+ if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
+ && r10_bio->devs[i].bio != fbio)
+ for (j = 0; j < vcnt; j++)
+ memcpy(page_address(tbio->bi_io_vec[j].bv_page),
+ page_address(fbio->bi_io_vec[j].bv_page),
+ PAGE_SIZE);
+ d = r10_bio->devs[i].devnum;
+ atomic_inc(&r10_bio->remaining);
+ md_sync_acct(conf->mirrors[d].replacement->bdev,
+ tbio->bi_size >> 9);
+ generic_make_request(tbio);
+ }
+
done:
if (atomic_dec_and_test(&r10_bio->remaining)) {
md_done_sync(mddev, r10_bio->sectors, 1);
@@ -2289,6 +2322,22 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
r10_bio->sectors, 0))
md_error(conf->mddev, rdev);
}
+ rdev = conf->mirrors[dev].replacement;
+ if (r10_bio->devs[m].repl_bio == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE,
+ &r10_bio->devs[m].repl_bio->bi_flags)) {
+ rdev_clear_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors);
+ } else {
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0))
+ md_error(conf->mddev, rdev);
+ }
}
put_buf(r10_bio);
} else {
@@ -2471,9 +2520,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bitmap_end_sync(mddev->bitmap, sect,
&sync_blocks, 1);
}
- } else /* completed sync */
+ } else {
+ /* completed sync */
+ if ((!mddev->bitmap || conf->fullsync)
+ && conf->have_replacement
+ && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+ /* Completed a full sync so the replacements
+ * are now fully recovered.
+ */
+ for (i = 0; i < conf->raid_disks; i++)
+ if (conf->mirrors[i].replacement)
+ conf->mirrors[i].replacement
+ ->recovery_offset
+ = MaxSector;
+ }
conf->fullsync = 0;
-
+ }
bitmap_close_sync(mddev->bitmap);
close_sync(conf);
*skipped = 1;
@@ -2721,6 +2783,9 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t first_bad, sector;
int bad_sectors;
+ if (r10_bio->devs[i].repl_bio)
+ r10_bio->devs[i].repl_bio->bi_end_io = NULL;
+
bio = r10_bio->devs[i].bio;
bio->bi_end_io = NULL;
clear_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -2751,6 +2816,27 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
count++;
+
+ if (conf->mirrors[d].replacement == NULL ||
+ test_bit(Faulty,
+ &conf->mirrors[d].replacement->flags))
+ continue;
+
+ /* Need to set up for writing to the replacement */
+ bio = r10_bio->devs[i].repl_bio;
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+
+ sector = r10_bio->devs[i].addr;
+ atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ bio->bi_next = biolist;
+ biolist = bio;
+ bio->bi_private = r10_bio;
+ bio->bi_end_io = end_sync_write;
+ bio->bi_rw = WRITE;
+ bio->bi_sector = sector +
+ conf->mirrors[d].replacement->data_offset;
+ bio->bi_bdev = conf->mirrors[d].replacement->bdev;
+ count++;
}
if (count < 2) {
@@ -2759,6 +2845,11 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
if (r10_bio->devs[i].bio->bi_end_io)
rdev_dec_pending(conf->mirrors[d].rdev,
mddev);
+ if (r10_bio->devs[i].repl_bio &&
+ r10_bio->devs[i].repl_bio->bi_end_io)
+ rdev_dec_pending(
+ conf->mirrors[d].replacement,
+ mddev);
}
put_buf(r10_bio);
biolist = NULL;
next prev parent reply other threads:[~2011-11-02 5:25 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-02 5:25 [md PATCH 00/17] hot-replace support for RAID1 and RAID10 NeilBrown
2011-11-02 5:25 ` [md PATCH 01/17] md/raid10: prepare data structures for handling replacement NeilBrown
2011-11-02 5:25 ` [md PATCH 04/17] md/raid10: allow removal of failed replacement devices NeilBrown
2011-11-02 5:25 ` [md PATCH 02/17] md/raid10: change read_balance to return an rdev NeilBrown
2011-11-02 5:25 ` [md PATCH 03/17] md/raid10: preferentially read from replacement device if possible NeilBrown
2011-11-02 5:25 ` [md PATCH 07/17] md/raid10: handle recovery of replacement devices NeilBrown
2011-11-02 5:25 ` [md PATCH 12/17] md/raid1: Allocate spare to store replacement devices and their bios NeilBrown
2011-11-02 5:25 ` [md PATCH 15/17] md/raid1: recognise replacements when assembling arrays NeilBrown
2011-11-02 5:25 ` NeilBrown [this message]
2011-11-02 5:25 ` [md PATCH 10/17] md/raid10: If there is a spare and a replaceable device, start replacement NeilBrown
2011-11-02 5:25 ` [md PATCH 13/17] md/raid1: Allow a failed replacement device to be removed NeilBrown
2011-11-02 5:25 ` [md PATCH 05/17] md/raid10: writes should get directed to replacement as well as original NeilBrown
2011-11-02 5:25 ` [md PATCH 08/17] md/raid10: Allow replacement device to be replace old drive NeilBrown
2011-11-02 5:25 ` [md PATCH 11/17] md/raid1: Replace use of mddev->raid_disks with conf->raid_disks NeilBrown
2011-11-02 5:25 ` [md PATCH 09/17] md/raid10: recognise replacements when assembling array NeilBrown
2011-11-02 5:25 ` [md PATCH 14/17] md/raid1: handle activation of replacement device when recovery completes NeilBrown
2011-11-02 5:25 ` [md PATCH 16/17] md/raid1: If there is a spare and a replaceable device, start replacement NeilBrown
2011-11-02 5:25 ` [md PATCH 17/17] md/raid1: Mark device replaceable when we see a write error NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111102052544.17566.72926.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).