[md PATCH 08/17] md/raid10: Allow replacement device to be replace old drive.

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 08/17] md/raid10: Allow replacement device to be replace old drive.
Date: Wed, 02 Nov 2011 16:25:44 +1100	[thread overview]
Message-ID: <20111102052544.17566.55334.stgit@notabene.brown> (raw)
In-Reply-To: <20111102051851.17566.52748.stgit@notabene.brown>

When recovery finish and spare_active is called, check for a
replace that might have just become fully synced and mark it
as such, marking the original as failed.

Then when the original is removed, move the replacement into
its position.

This means that 'replacement' and spontaneously become NULL in some
situations.  Make sure we check for those.
It also means that 'rdev' and 'replacement' could appear to be
identical - check for that too.

Signed-off-by: NeilBrown <neilb@suse.de>
---

 drivers/md/raid10.c |   72 +++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 89de485..fd28f03 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -396,14 +396,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
 	int dec_rdev = 1;
 	struct r10conf *conf = r10_bio->mddev->private;
 	int slot, repl;
-	struct md_rdev *rdev;
+	struct md_rdev *rdev = NULL;
 
 	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
 	if (repl)
 		rdev = conf->mirrors[dev].replacement;
-	else
+	if (!rdev) {
+		smp_rmb();
+		repl = 0;
 		rdev = conf->mirrors[dev].rdev;
+	}
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
@@ -1090,6 +1093,8 @@ retry_write:
 		struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
 		struct md_rdev *rrdev = rcu_dereference(
 			conf->mirrors[d].replacement);
+		if (rdev == rrdev)
+			rrdev = NULL;
 		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
 			atomic_inc(&rdev->nr_pending);
 			blocked_rdev = rdev;
@@ -1171,9 +1176,15 @@ retry_write:
 				rdev_dec_pending(conf->mirrors[d].rdev, mddev);
 			}
 			if (r10_bio->devs[j].repl_bio) {
+				struct md_rdev *rdev;
 				d = r10_bio->devs[j].devnum;
-				rdev_dec_pending(
-					conf->mirrors[d].replacement, mddev);
+				rdev = conf->mirrors[d].replacement;
+				if (!rdev) {
+					/* Race with remove_disk */
+					smp_mb();
+					rdev = conf->mirrors[d].rdev;
+				}
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 		allow_barrier(conf);
@@ -1231,6 +1242,10 @@ retry_write:
 			    max_sectors);
 		r10_bio->devs[i].repl_bio = mbio;
 
+		/* We are actively writing to the original device
+		 * so it cannot disappear, so the replacement cannot
+		 * become NULL here
+		 */
 		mbio->bi_sector	= (r10_bio->devs[i].addr+
 				   conf->mirrors[d].replacement->data_offset);
 		mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
@@ -1406,9 +1421,27 @@ static int raid10_spare_active(struct mddev *mddev)
 	 */
 	for (i = 0; i < conf->raid_disks; i++) {
 		tmp = conf->mirrors + i;
-		if (tmp->rdev
-		    && !test_bit(Faulty, &tmp->rdev->flags)
-		    && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+		if (tmp->replacement
+		    && tmp->replacement->recovery_offset == MaxSector
+		    && !test_bit(Faulty, &tmp->replacement->flags)
+		    && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+			/* Replacement has just become active */
+			if (!tmp->rdev
+			    || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+				count++;
+			if (tmp->rdev) {
+				/* Replaced device not technically faulty,
+				 * but we need to be sure it gets removed
+				 * and never re-added.
+				 */
+				set_bit(Faulty, &tmp->rdev->flags);
+				sysfs_notify_dirent_safe(
+					tmp->rdev->sysfs_state);
+			}
+			sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
+		} else if (tmp->rdev
+			   && !test_bit(Faulty, &tmp->rdev->flags)
+			   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
 			count++;
 			sysfs_notify_dirent(tmp->rdev->sysfs_state);
 		}
@@ -1508,6 +1541,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 	 */
 	if (!test_bit(Faulty, &rdev->flags) &&
 	    mddev->recovery_disabled != p->recovery_disabled &&
+	    (!p->replacement || p->replacement == rdev) &&
 	    enough(conf, -1)) {
 		err = -EBUSY;
 		goto abort;
@@ -1519,7 +1553,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 		err = -EBUSY;
 		*rdevp = rdev;
 		goto abort;
-	}
+	} else if (p->replacement) {
+		/* We must have just cleared 'rdev' */
+		p->rdev = p->replacement;
+		clear_bit(Replacement, &p->replacement->flags);
+		smp_mb(); /* Make sure other CPUs may see both as identical
+			   * but will never see neither -- if they are careful.
+			   */
+		p->replacement = NULL;
+		clear_bit(Replaceable, &rdev->flags);
+	} else
+		/* We might have just remove the Replacement as faulty
+		 * Clear the flag just in case
+		 */
+		clear_bit(Replaceable, &rdev->flags);
+
 	err = md_integrity_register(mddev);
 
 abort:
@@ -1597,13 +1645,15 @@ static void end_sync_write(struct bio *bio, int error)
 	int bad_sectors;
 	int slot;
 	int repl;
-	struct md_rdev *rdev;
+	struct md_rdev *rdev = NULL;
 
 	d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 	if (repl)
 		rdev = conf->mirrors[d].replacement;
-	else
+	if (!rdev) {
+		smp_mb();
 		rdev = conf->mirrors[d].rdev;
+	}
 
 	if (!uptodate) {
 		if (repl)
@@ -2370,7 +2420,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			}
 			bio = r10_bio->devs[m].repl_bio;
 			rdev = conf->mirrors[dev].replacement;
-			if (bio == IO_MADE_GOOD) {
+			if (rdev && bio == IO_MADE_GOOD) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,

next prev parent reply	other threads:[~2011-11-02  5:25 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-02  5:25 [md PATCH 00/17] hot-replace support for RAID1 and RAID10 NeilBrown
2011-11-02  5:25 ` [md PATCH 03/17] md/raid10: preferentially read from replacement device if possible NeilBrown
2011-11-02  5:25 ` [md PATCH 02/17] md/raid10: change read_balance to return an rdev NeilBrown
2011-11-02  5:25 ` [md PATCH 04/17] md/raid10: allow removal of failed replacement devices NeilBrown
2011-11-02  5:25 ` [md PATCH 01/17] md/raid10: prepare data structures for handling replacement NeilBrown
2011-11-02  5:25 ` [md PATCH 05/17] md/raid10: writes should get directed to replacement as well as original NeilBrown
2011-11-02  5:25 ` [md PATCH 14/17] md/raid1: handle activation of replacement device when recovery completes NeilBrown
2011-11-02  5:25 ` [md PATCH 09/17] md/raid10: recognise replacements when assembling array NeilBrown
2011-11-02  5:25 ` [md PATCH 11/17] md/raid1: Replace use of mddev->raid_disks with conf->raid_disks NeilBrown
2011-11-02  5:25 ` NeilBrown [this message]
2011-11-02  5:25 ` [md PATCH 12/17] md/raid1: Allocate spare to store replacement devices and their bios NeilBrown
2011-11-02  5:25 ` [md PATCH 07/17] md/raid10: handle recovery of replacement devices NeilBrown
2011-11-02  5:25 ` [md PATCH 13/17] md/raid1: Allow a failed replacement device to be removed NeilBrown
2011-11-02  5:25 ` [md PATCH 10/17] md/raid10: If there is a spare and a replaceable device, start replacement NeilBrown
2011-11-02  5:25 ` [md PATCH 06/17] md/raid10: Handle replacement devices during resync NeilBrown
2011-11-02  5:25 ` [md PATCH 15/17] md/raid1: recognise replacements when assembling arrays NeilBrown
2011-11-02  5:25 ` [md PATCH 16/17] md/raid1: If there is a spare and a replaceable device, start replacement NeilBrown
2011-11-02  5:25 ` [md PATCH 17/17] md/raid1: Mark device replaceable when we see a write error NeilBrown

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:89de485 dfblob:fd28f03 )
 OR (
bs:"[md PATCH 08/17] md/raid10: Allow replacement device to be replace old drive." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111102052544.17566.55334.stgit@notabene.brown \
    --to=neilb@suse.de \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).