linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 06/16] md/raid5: allow each slot to have an extra replacement device
Date: Wed, 26 Oct 2011 12:43:01 +1100	[thread overview]
Message-ID: <20111026014301.21110.82947.stgit@notabene.brown> (raw)
In-Reply-To: <20111026014240.21110.28487.stgit@notabene.brown>

Just enhance data structures to record a second device per slot to be
used as a 'replacement' device, replacing the original.
We also have a second bio in each slot in each stripe_head.  This will
only be used when writing to the array - we need to write to both the
original and the replacement at the same time, so will need two bios.

For now, only try using the replacement drive for aligned-reads.
In this case, we prefer the replacement if it has been recovered far
enough, otherwise use the original.

This includes a small enhancement.  Previously we would only do
aligned reads if the target device was fully recovered.  Now we also
do them if it has recovered far enough.

Signed-off-by: NeilBrown <neilb@suse.de>
---

 drivers/md/raid5.c |   15 ++++++++++++--
 drivers/md/raid5.h |   57 ++++++++++++++++++++++++++++++----------------------
 2 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 27056d3..e6d10df 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3573,6 +3573,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	int dd_idx;
 	struct bio* align_bi;
 	struct md_rdev *rdev;
+	sector_t end_sector;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
 		pr_debug("chunk_aligned_read : non aligned\n");
@@ -3597,9 +3598,19 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 						    0,
 						    &dd_idx, NULL);
 
+	end_sector = align_bi->bi_sector + (align_bi->bi_size >>9);
 	rcu_read_lock();
-	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
-	if (rdev && test_bit(In_sync, &rdev->flags)) {
+	rdev = rcu_dereference(conf->disks[dd_idx].replacement);
+	if (!rdev || test_bit(Faulty, &rdev->flags) ||
+	    rdev->recovery_offset < end_sector) {
+		rdev = rcu_dereference(conf->disks[dd_idx].rdev);
+		if (rdev &&
+		    (test_bit(Faulty, &rdev->flags) ||
+		    !(test_bit(In_sync, &rdev->flags) ||
+		      rdev->recovery_offset >= end_sector)))
+			rdev = NULL;
+	}
+	if (rdev) {
 		sector_t first_bad;
 		int bad_sectors;
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index e10c553..43106f0 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -226,8 +226,11 @@ struct stripe_head {
 		#endif
 	} ops;
 	struct r5dev {
-		struct bio	req;
-		struct bio_vec	vec;
+		/* rreq and rvec are used for the replacement device when
+		 * writing data to both devices.
+		 */
+		struct bio	req, rreq;
+		struct bio_vec	vec, rvec;
 		struct page	*page;
 		struct bio	*toread, *read, *towrite, *written;
 		sector_t	sector;			/* sector of this page */
@@ -252,29 +255,35 @@ struct stripe_head_state {
 	int handle_bad_blocks;
 };
 
-/* Flags */
-#define	R5_UPTODATE	0	/* page contains current data */
-#define	R5_LOCKED	1	/* IO has been submitted on "req" */
-#define	R5_OVERWRITE	2	/* towrite covers whole page */
+/* Flags for struct r5dev.flags */
+enum r5dev_flags {
+	R5_UPTODATE,	/* page contains current data */
+	R5_LOCKED,	/* IO has been submitted on "req" */
+	R5_OVERWRITE,	/* towrite covers whole page */
 /* and some that are internal to handle_stripe */
-#define	R5_Insync	3	/* rdev && rdev->in_sync at start */
-#define	R5_Wantread	4	/* want to schedule a read */
-#define	R5_Wantwrite	5
-#define	R5_Overlap	7	/* There is a pending overlapping request on this block */
-#define	R5_ReadError	8	/* seen a read error here recently */
-#define	R5_ReWrite	9	/* have tried to over-write the readerror */
+	R5_Insync,	/* rdev && rdev->in_sync at start */
+	R5_Wantread,	/* want to schedule a read */
+	R5_Wantwrite,
+	R5_Overlap,	/* There is a pending overlapping request
+			 * on this block */
+	R5_ReadError,	/* seen a read error here recently */
+	R5_ReWrite,	/* have tried to over-write the readerror */
 
-#define	R5_Expanded	10	/* This block now has post-expand data */
-#define	R5_Wantcompute	11	/* compute_block in progress treat as
-				 * uptodate
-				 */
-#define	R5_Wantfill	12	/* dev->toread contains a bio that needs
-				 * filling
-				 */
-#define	R5_Wantdrain	13	/* dev->towrite needs to be drained */
-#define	R5_WantFUA	14	/* Write should be FUA */
-#define	R5_WriteError	15	/* got a write error - need to record it */
-#define	R5_MadeGood	16	/* A bad block has been fixed by writing to it*/
+	R5_Expanded,	/* This block now has post-expand data */
+	R5_Wantcompute,	/* compute_block in progress treat as
+			 * uptodate
+			 */
+	R5_Wantfill,	/* dev->toread contains a bio that needs
+			 * filling
+			 */
+	R5_Wantdrain,	/* dev->towrite needs to be drained */
+	R5_WantFUA,	/* Write should be FUA */
+	R5_WriteError,	/* got a write error - need to record it */
+	R5_MadeGood,	/* A bad block has been fixed by writing to it */
+	R5_ReadRepl,	/* Will/did read from replacement rather than orig */
+	R5_MadeGoodRepl,/* A bad block on the replacement device has been
+			 * fixed by writing to it */
+};
 /*
  * Write method
  */
@@ -344,7 +353,7 @@ enum {
 
 
 struct disk_info {
-	struct md_rdev	*rdev;
+	struct md_rdev	*rdev, *replacement;
 };
 
 struct r5conf {



  parent reply	other threads:[~2011-10-26  1:43 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-10-26  1:43 [md PATCH 00/16] hot-replace support for RAID4/5/6 NeilBrown
2011-10-26  1:43 ` [md PATCH 03/16] md: remove test for duplicate device when setting slot number NeilBrown
2011-10-26  1:43 ` [md PATCH 01/16] md: refine interpretation of "hold_active == UNTIL_IOCTL" NeilBrown
2011-10-26  1:43 ` [md PATCH 02/16] md: take after reference to mddev during sysfs access NeilBrown
2011-10-26  1:43 ` [md PATCH 04/16] md: change hot_remove_disk to take an rdev rather than a number NeilBrown
2011-10-26  1:43 ` [md PATCH 08/16] md/raid5: remove redundant bio initialisations NeilBrown
2011-10-26  1:43 ` [md PATCH 11/16] md/raid5: writes should get directed to replacement as well as original NeilBrown
2011-10-26  1:43 ` [md PATCH 13/16] md/raid5: handle activation of replacement device when recovery completes NeilBrown
2011-10-26  1:43 ` [md PATCH 05/16] md: create externally visible flags for supporting hot-replace NeilBrown
2011-10-26  1:43 ` NeilBrown [this message]
2011-10-26  1:43 ` [md PATCH 10/16] md/raid5: allow removal for failed replacement devices NeilBrown
2011-10-26  1:43 ` [md PATCH 12/16] md/raid5: detect and handle replacements during recovery NeilBrown
2011-10-26  1:43 ` [md PATCH 09/16] md/raid5: preferentially read from replacement device if possible NeilBrown
2011-10-26  1:43 ` [md PATCH 07/16] md/raid5: raid5.h cleanup NeilBrown
2011-10-26  1:43 ` [md PATCH 14/16] md/raid5: recognise replacements when assembling array NeilBrown
2011-10-26  1:43 ` [md PATCH 15/16] md/raid5: If there is a spare and a replaceable device, start replacement NeilBrown
2011-10-26  1:43 ` [md PATCH 16/16] md/raid5: Mark device replaceable when we see a write error NeilBrown
2011-10-26  6:38 ` [md PATCH 00/16] hot-replace support for RAID4/5/6 David Brown
2011-10-26  7:42   ` NeilBrown
2011-10-26  9:01   ` John Robinson
2011-10-26 13:57     ` Peter W. Morreale
2011-10-26 17:27       ` Piergiorgio Sartor
2011-10-27 17:10 ` Peter W. Morreale
2011-10-27 20:44   ` NeilBrown
2011-10-27 20:53     ` Peter W. Morreale
2011-12-14 22:18 ` Dan Williams
2011-12-15  6:18   ` NeilBrown
2011-12-15  7:14     ` Williams, Dan J
2011-12-20  5:18       ` NeilBrown
2011-12-22 20:54         ` Alexander Kühn
2011-12-22 21:14           ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111026014301.21110.82947.stgit@notabene.brown \
    --to=neilb@suse.de \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).