All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-raid@vger.kernel.org
Subject: [PATCH RFC 1/4] md: introduce struct stripe_head_state
Date: Tue, 10 Apr 2007 23:00:26 -0700	[thread overview]
Message-ID: <20070411060026.15745.84714.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070411055729.15745.51513.stgit@dwillia2-linux.ch.intel.com>

struct stripe_head_state collects all the dynamic stripe-state information
that is calculated/tracked during calls to handle_stripe.  This enables a
mechanism for handle_stripe functionality to be broken off into
subroutines.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c         |  280 ++++++++++++++++++++++----------------------
 include/linux/raid/raid5.h |   11 ++
 2 files changed, 153 insertions(+), 138 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 74ce354..684552a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1872,12 +1872,14 @@ static void handle_stripe5(struct stripe_head *sh)
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
-	int syncing, expanding, expanded;
-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
-	int to_fill=0, compute=0, req_compute=0, non_overwrite=0;
-	int failed_num=0;
+	struct stripe_head_state s = {
+		.locked=0, .uptodate=0, .to_read=0, .to_write=0, .failed=0,
+		.written=0, .to_fill=0, .compute=0, .req_compute=0,
+		.non_overwrite=0,
+	};
 	struct r5dev *dev;
 	unsigned long pending=0;
+	s.failed_num=0;
 
 	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d ops=%lx:%lx:%lx\n",
 	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
@@ -1887,9 +1889,9 @@ static void handle_stripe5(struct stripe_head *sh)
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
 	rcu_read_lock();
@@ -1911,22 +1913,22 @@ static void handle_stripe5(struct stripe_head *sh)
 			set_bit(R5_Wantfill, &dev->flags);
 
 		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
+		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
 
 		if (test_bit(R5_Wantfill, &dev->flags))
-			to_fill++;
+			s.to_fill++;
 		else if (dev->toread)
-			to_read++;
+			s.to_read++;
 
-		if (test_bit(R5_Wantcompute, &dev->flags)) BUG_ON(++compute > 1);
+		if (test_bit(R5_Wantcompute, &dev->flags)) BUG_ON(++s.compute > 1);
 
 		if (dev->towrite) {
-			to_write++;
+			s.to_write++;
 			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				non_overwrite++;
+				s.non_overwrite++;
 		}
-		if (dev->written) written++;
+		if (dev->written) s.written++;
 		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
@@ -1935,23 +1937,24 @@ static void handle_stripe5(struct stripe_head *sh)
 		}
 		if (!rdev || !test_bit(In_sync, &rdev->flags)
 		    || test_bit(R5_ReadError, &dev->flags)) {
-			failed++;
-			failed_num = i;
+			s.failed++;
+			s.failed_num = i;
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
 	rcu_read_unlock();
 
-	if (to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
+	if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
 		sh->ops.count++;
 
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 		" to_write=%d to_fill=%d failed=%d failed_num=%d\n",
-		locked, uptodate, to_read, to_write, to_fill, failed, failed_num);
+		s.locked, s.uptodate, s.to_read, s.to_write, s.to_fill,
+		s.failed, s.failed_num);
 	/* check if the array has lost two devices and, if so, some requests might
 	 * need to be failed
 	 */
-	if (failed > 1 && to_read+to_write+written) {
+	if (s.failed > 1 && s.to_read+s.to_write+s.written) {
 		for (i=disks; i--; ) {
 			int bitmap_end = 0;
 
@@ -1969,7 +1972,7 @@ static void handle_stripe5(struct stripe_head *sh)
 			/* fail all writes first */
 			bi = sh->dev[i].towrite;
 			sh->dev[i].towrite = NULL;
-			if (bi) { to_write--; bitmap_end = 1; }
+			if (bi) { s.to_write--; bitmap_end = 1; }
 
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
@@ -2009,7 +2012,7 @@ static void handle_stripe5(struct stripe_head *sh)
 				sh->dev[i].toread = NULL;
 				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 					wake_up(&conf->wait_for_overlap);
-				if (bi) to_read--;
+				if (bi) s.to_read--;
 				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
 					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 					clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2026,20 +2029,20 @@ static void handle_stripe5(struct stripe_head *sh)
 						STRIPE_SECTORS, 0, 0);
 		}
 	}
-	if (failed > 1 && syncing) {
+	if (s.failed > 1 && s.syncing) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
 		clear_bit(STRIPE_SYNCING, &sh->state);
-		syncing = 0;
+		s.syncing = 0;
 	}
 
 	/* might be able to return some write requests if the parity block
 	 * is safe, or on a failed drive
 	 */
 	dev = &sh->dev[sh->pd_idx];
-	if ( written &&
+	if ( s.written &&
 	     ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
 		test_bit(R5_UPTODATE, &dev->flags))
-	       || (failed == 1 && failed_num == sh->pd_idx))
+	       || (s.failed == 1 && s.failed_num == sh->pd_idx))
 	    ) {
 	    /* any written block on an uptodate or failed drive can be returned.
 	     * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 
@@ -2081,8 +2084,8 @@ static void handle_stripe5(struct stripe_head *sh)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (syncing && (uptodate + compute < disks)) || expanding ||
-		test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+	if (s.to_read || s.non_overwrite || (s.syncing && (s.uptodate + s.compute < disks)) ||
+		s.expanding || test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
 
 		/* Clear completed compute operations.  Parity recovery
 		 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
@@ -2114,11 +2117,11 @@ static void handle_stripe5(struct stripe_head *sh)
 				if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 				     (dev->toread ||
 				     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-				     syncing ||
-				     expanding ||
-				     (failed && (sh->dev[failed_num].toread ||
-						 (sh->dev[failed_num].towrite &&
-						 	!test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
+				     s.syncing ||
+				     s.expanding ||
+				     (s.failed && (sh->dev[s.failed_num].toread ||
+						 (sh->dev[s.failed_num].towrite &&
+						 	!test_bit(R5_OVERWRITE, &sh->dev[s.failed_num].flags))))
 					    )
 					) {
 					/* 1/ We would like to get this block, possibly
@@ -2132,20 +2135,20 @@ static void handle_stripe5(struct stripe_head *sh)
 					 * 3/ We hold off parity block re-reads until check
 					 * operations have quiesced.
 					 */
-					if ((uptodate == disks-1) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+					if ((s.uptodate == disks-1) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 						set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
 						set_bit(R5_Wantcompute, &dev->flags);
 						sh->ops.target = i;
-						req_compute = 1;
+						s.req_compute = 1;
 						sh->ops.count++;
 						/* Careful: from this point on 'uptodate' is in the eye of
 						 * raid5_run_ops which services 'compute' operations before
 						 * writes. R5_Wantcompute flags a block that will be R5_UPTODATE
 						 * by the time it is needed for a subsequent operation.
 						 */
-						uptodate++;
+						s.uptodate++;
 						break; /* uptodate + compute == disks */
-					} else if ((uptodate < disks-1) && test_bit(R5_Insync, &dev->flags)) {
+					} else if ((s.uptodate < disks-1) && test_bit(R5_Insync, &dev->flags)) {
 						/* Note: we hold off compute operations while checks are in flight,
 						 * but we still prefer 'compute' over 'read' hence we only read if
 						 * (uptodate < disks-1)
@@ -2154,9 +2157,9 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 						PRINTK("Reading block %d (sync=%d)\n",
-							i, syncing);
+							i, s.syncing);
 					}
 				}
 			}
@@ -2207,7 +2210,7 @@ static void handle_stripe5(struct stripe_head *sh)
 				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 					sh->ops.count++;
 				if (!test_bit(R5_Insync, &dev->flags)
-				    || (i==sh->pd_idx && failed == 0))
+				    || (i==sh->pd_idx && s.failed == 0))
 					set_bit(STRIPE_INSYNC, &sh->state);
 			}
 		}
@@ -2223,7 +2226,7 @@ static void handle_stripe5(struct stripe_head *sh)
 	 *    a check is in flight
 	 * 3/ Write operations do not stack
 	 */
-	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+	if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
 		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
@@ -2266,7 +2269,7 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 					} else {
 						set_bit(STRIPE_DELAYED, &sh->state);
 						set_bit(STRIPE_HANDLE, &sh->state);
@@ -2288,7 +2291,7 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 					} else {
 						set_bit(STRIPE_DELAYED, &sh->state);
 						set_bit(STRIPE_HANDLE, &sh->state);
@@ -2303,10 +2306,10 @@ static void handle_stripe5(struct stripe_head *sh)
 		 * is not the case then new writes need to be held off until the compute
 		 * completes.
 		 */
-		if ((req_compute || !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
-			(locked == 0 && (rcw == 0 ||rmw == 0) &&
+		if ((s.req_compute || !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
+			(s.locked == 0 && (rcw == 0 ||rmw == 0) &&
 			!test_bit(STRIPE_BIT_DELAY, &sh->state)))
-			locked += handle_write_operations5(sh, rcw == 0, 0);
+			s.locked += handle_write_operations5(sh, rcw == 0, 0);
 	}
 
 	/* 1/ Maybe we need to check and possibly fix the parity for this stripe.
@@ -2315,7 +2318,7 @@ static void handle_stripe5(struct stripe_head *sh)
 	 * 2/ Hold off parity checks while parity dependent operations are in flight
 	 *    (conflicting writes are protected by the 'locked' variable)
 	 */
-	if ((syncing && locked == 0 && !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
+	if ((s.syncing && s.locked == 0 && !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
 		!test_bit(STRIPE_INSYNC, &sh->state)) ||
 	    	test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
 	    	test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
@@ -2327,12 +2330,12 @@ static void handle_stripe5(struct stripe_head *sh)
 		 * 3/ skip to the writeback section if we previously
 		 *    initiated a recovery operation
 		 */
-		if (failed == 0 && !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+		if (s.failed == 0 && !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
 			if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
-				BUG_ON(uptodate != disks);
+				BUG_ON(s.uptodate != disks);
 				clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
 				sh->ops.count++;
-				uptodate--;
+				s.uptodate--;
 			} else if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
 				clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
 				clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
@@ -2354,7 +2357,7 @@ static void handle_stripe5(struct stripe_head *sh)
 							&sh->dev[sh->pd_idx].flags);
 						sh->ops.target = sh->pd_idx;
 						sh->ops.count++;
-						uptodate++;
+						s.uptodate++;
 					}
 				}
 			}
@@ -2378,22 +2381,22 @@ static void handle_stripe5(struct stripe_head *sh)
 			!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
 
 			/* either failed parity check, or recovery is happening */
-			if (failed==0)
-				failed_num = sh->pd_idx;
-			dev = &sh->dev[failed_num];
+			if (s.failed==0)
+				s.failed_num = sh->pd_idx;
+			dev = &sh->dev[s.failed_num];
 			BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
-			BUG_ON(uptodate != disks);
+			BUG_ON(s.uptodate != disks);
 
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantwrite, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			clear_bit(STRIPE_DEGRADED, &sh->state);
-			locked++;
+			s.locked++;
 			set_bit(STRIPE_INSYNC, &sh->state);
 		}
 	}
-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
@@ -2401,26 +2404,26 @@ static void handle_stripe5(struct stripe_head *sh)
 	/* If the failed drive is just a ReadError, then we might need to progress
 	 * the repair/check process
 	 */
-	if (failed == 1 && ! conf->mddev->ro &&
-	    test_bit(R5_ReadError, &sh->dev[failed_num].flags)
-	    && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
-	    && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
+	if (s.failed == 1 && ! conf->mddev->ro &&
+	    test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
+	    && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
+	    && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
 		) {
-		dev = &sh->dev[failed_num];
+		dev = &sh->dev[s.failed_num];
 		if (!test_bit(R5_ReWrite, &dev->flags)) {
 			set_bit(R5_Wantwrite, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			set_bit(R5_ReWrite, &dev->flags);
 			set_bit(R5_LOCKED, &dev->flags);
-			locked++;
+			s.locked++;
 		} else {
 			/* let's read it back */
 			set_bit(R5_Wantread, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			set_bit(R5_LOCKED, &dev->flags);
-			locked++;
+			s.locked++;
 		}
 	}
 
@@ -2443,20 +2446,20 @@ static void handle_stripe5(struct stripe_head *sh)
 		}
 	}
 
-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
 		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
 		/* Need to write out all blocks after computing parity */
 		sh->disks = conf->raid_disks;
 		sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
-		locked += handle_write_operations5(sh, 0, 1);
-	} else if (expanded && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+		s.locked += handle_write_operations5(sh, 0, 1);
+	} else if (s.expanded && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
 		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 	}
 
-	if (expanding && locked == 0) {
+	if (s.expanding && s.locked == 0) {
 		/* We have read all the blocks in this stripe and now we need to
 		 * copy some of them into a target stripe for expand.
 		 */
@@ -2537,14 +2540,15 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
-	int syncing, expanding, expanded;
-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
-	int non_overwrite = 0;
-	int failed_num[2] = {0, 0};
+	struct stripe_head_state s = {
+		.locked=0, .uptodate=0, .to_read=0, .to_write=0, .failed=0,
+		.written=0, .non_overwrite = 0,
+	};
 	struct r5dev *dev, *pdev, *qdev;
 	int pd_idx = sh->pd_idx;
 	int qd_idx = raid6_next_disk(pd_idx, disks);
 	int p_failed, q_failed;
+	s.r6_failed_num[0] = s.r6_failed_num[1] = 0;
 
 	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
 	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
@@ -2554,9 +2558,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
 	rcu_read_lock();
@@ -2591,17 +2595,17 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 
 		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
+		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
 
 
-		if (dev->toread) to_read++;
+		if (dev->toread) s.to_read++;
 		if (dev->towrite) {
-			to_write++;
+			s.to_write++;
 			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				non_overwrite++;
+				s.non_overwrite++;
 		}
-		if (dev->written) written++;
+		if (dev->written) s.written++;
 		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
@@ -2610,21 +2614,21 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 		if (!rdev || !test_bit(In_sync, &rdev->flags)
 		    || test_bit(R5_ReadError, &dev->flags)) {
-			if ( failed < 2 )
-				failed_num[failed] = i;
-			failed++;
+			if ( s.failed < 2 )
+				s.r6_failed_num[s.failed] = i;
+			s.failed++;
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
 	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
-	       locked, uptodate, to_read, to_write, failed,
-	       failed_num[0], failed_num[1]);
+	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
+	       s.r6_failed_num[0], s.r6_failed_num[1]);
 	/* check if the array has lost >2 devices and, if so, some requests might
 	 * need to be failed
 	 */
-	if (failed > 2 && to_read+to_write+written) {
+	if (s.failed > 2 && s.to_read+s.to_write+s.written) {
 		for (i=disks; i--; ) {
 			int bitmap_end = 0;
 
@@ -2642,7 +2646,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			/* fail all writes first */
 			bi = sh->dev[i].towrite;
 			sh->dev[i].towrite = NULL;
-			if (bi) { to_write--; bitmap_end = 1; }
+			if (bi) { s.to_write--; bitmap_end = 1; }
 
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
@@ -2679,7 +2683,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				sh->dev[i].toread = NULL;
 				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 					wake_up(&conf->wait_for_overlap);
-				if (bi) to_read--;
+				if (bi) s.to_read--;
 				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
 					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 					clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2696,10 +2700,10 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 						STRIPE_SECTORS, 0, 0);
 		}
 	}
-	if (failed > 2 && syncing) {
+	if (s.failed > 2 && s.syncing) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
 		clear_bit(STRIPE_SYNCING, &sh->state);
-		syncing = 0;
+		s.syncing = 0;
 	}
 
 	/*
@@ -2707,13 +2711,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * are safe, or on a failed drive
 	 */
 	pdev = &sh->dev[pd_idx];
-	p_failed = (failed >= 1 && failed_num[0] == pd_idx)
-		|| (failed >= 2 && failed_num[1] == pd_idx);
+	p_failed = (s.failed >= 1 && s.r6_failed_num[0] == pd_idx)
+		|| (s.failed >= 2 && s.r6_failed_num[1] == pd_idx);
 	qdev = &sh->dev[qd_idx];
-	q_failed = (failed >= 1 && failed_num[0] == qd_idx)
-		|| (failed >= 2 && failed_num[1] == qd_idx);
+	q_failed = (s.failed >= 1 && s.r6_failed_num[0] == qd_idx)
+		|| (s.failed >= 2 && s.r6_failed_num[1] == qd_idx);
 
-	if ( written &&
+	if ( s.written &&
 	     ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
 			     && !test_bit(R5_LOCKED, &pdev->flags)
 			     && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
@@ -2762,28 +2766,28 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (to_write && failed) ||
-	    (syncing && (uptodate < disks)) || expanding) {
+	if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
+	    (s.syncing && (s.uptodate < disks)) || s.expanding) {
 		for (i=disks; i--;) {
 			dev = &sh->dev[i];
 			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 			    (dev->toread ||
 			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-			     syncing ||
-			     expanding ||
-			     (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
-			     (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
+			     s.syncing ||
+			     s.expanding ||
+			     (s.failed >= 1 && (sh->dev[s.r6_failed_num[0]].toread || s.to_write)) ||
+			     (s.failed >= 2 && (sh->dev[s.r6_failed_num[1]].toread || s.to_write))
 				    )
 				) {
 				/* we would like to get this block, possibly
 				 * by computing it, but we might not be able to
 				 */
-				if (uptodate == disks-1) {
+				if (s.uptodate == disks-1) {
 					PRINTK("Computing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
 					compute_block_1(sh, i, 0);
-					uptodate++;
-				} else if ( uptodate == disks-2 && failed >= 2 ) {
+					s.uptodate++;
+				} else if ( s.uptodate == disks-2 && s.failed >= 2 ) {
 					/* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
 					int other;
 					for (other=disks; other--;) {
@@ -2796,13 +2800,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 					PRINTK("Computing stripe %llu blocks %d,%d\n",
 					       (unsigned long long)sh->sector, i, other);
 					compute_block_2(sh, i, other);
-					uptodate += 2;
+					s.uptodate += 2;
 				} else if (test_bit(R5_Insync, &dev->flags)) {
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
-					locked++;
+					s.locked++;
 					PRINTK("Reading block %d (sync=%d)\n",
-						i, syncing);
+						i, s.syncing);
 				}
 			}
 		}
@@ -2810,7 +2814,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	}
 
 	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	if (s.to_write) {
 		int rcw=0, must_compute=0;
 		for (i=disks ; i--;) {
 			dev = &sh->dev[i];
@@ -2836,7 +2840,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			for (i=disks; i--;) {
 				dev = &sh->dev[i];
 				if (!test_bit(R5_OVERWRITE, &dev->flags)
-				    && !(failed == 0 && (i == pd_idx || i == qd_idx))
+				    && !(s.failed == 0 && (i == pd_idx || i == qd_idx))
 				    && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 				    test_bit(R5_Insync, &dev->flags)) {
 					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
@@ -2845,7 +2849,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 						       (unsigned long long)sh->sector, i);
 						set_bit(R5_LOCKED, &dev->flags);
 						set_bit(R5_Wantread, &dev->flags);
-						locked++;
+						s.locked++;
 					} else {
 						PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
 						       (unsigned long long)sh->sector, i);
@@ -2855,14 +2859,14 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				}
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
-		if (locked == 0 && rcw == 0 &&
+		if (s.locked == 0 && rcw == 0 &&
 		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
 			if ( must_compute > 0 ) {
 				/* We have failed blocks and need to compute them */
-				switch ( failed ) {
+				switch ( s.failed ) {
 				case 0:	BUG();
-				case 1: compute_block_1(sh, failed_num[0], 0); break;
-				case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
+				case 1: compute_block_1(sh, s.r6_failed_num[0], 0); break;
+				case 2: compute_block_2(sh, s.r6_failed_num[0], s.r6_failed_num[1]); break;
 				default: BUG();	/* This request should have been failed? */
 				}
 			}
@@ -2874,7 +2878,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
 					PRINTK("Writing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
-					locked++;
+					s.locked++;
 					set_bit(R5_Wantwrite, &sh->dev[i].flags);
 				}
 			/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
@@ -2892,14 +2896,14 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * Any reads will already have been scheduled, so we just see if enough data
 	 * is available
 	 */
-	if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
 		int update_p = 0, update_q = 0;
 		struct r5dev *dev;
 
 		set_bit(STRIPE_HANDLE, &sh->state);
 
-		BUG_ON(failed>2);
-		BUG_ON(uptodate < disks);
+		BUG_ON(s.failed>2);
+		BUG_ON(s.uptodate < disks);
 		/* Want to check and possibly repair P and Q.
 		 * However there could be one 'failed' device, in which
 		 * case we can only check one of them, possibly using the
@@ -2911,7 +2915,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		 * by stripe_handle with a tmp_page - just wait until then.
 		 */
 		if (tmp_page) {
-			if (failed == q_failed) {
+			if (s.failed == q_failed) {
 				/* The only possible failed device holds 'Q', so it makes
 				 * sense to check P (If anything else were failed, we would
 				 * have used P to recreate it).
@@ -2922,7 +2926,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 					update_p = 1;
 				}
 			}
-			if (!q_failed && failed < 2) {
+			if (!q_failed && s.failed < 2) {
 				/* q is not failed, and we didn't use it to generate
 				 * anything, so it makes sense to check it
 				 */
@@ -2948,28 +2952,28 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			 * or P or Q if they need it
 			 */
 
-			if (failed == 2) {
-				dev = &sh->dev[failed_num[1]];
-				locked++;
+			if (s.failed == 2) {
+				dev = &sh->dev[s.r6_failed_num[1]];
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
-			if (failed >= 1) {
-				dev = &sh->dev[failed_num[0]];
-				locked++;
+			if (s.failed >= 1) {
+				dev = &sh->dev[s.r6_failed_num[0]];
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
 
 			if (update_p) {
 				dev = &sh->dev[pd_idx];
-				locked ++;
+				s.locked ++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
 			if (update_q) {
 				dev = &sh->dev[qd_idx];
-				locked++;
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
@@ -2979,7 +2983,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 	}
 
-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
@@ -2987,9 +2991,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	/* If the failed drives are just a ReadError, then we might need
 	 * to progress the repair/check process
 	 */
-	if (failed <= 2 && ! conf->mddev->ro)
-		for (i=0; i<failed;i++) {
-			dev = &sh->dev[failed_num[i]];
+	if (s.failed <= 2 && ! conf->mddev->ro)
+		for (i=0; i<s.failed;i++) {
+			dev = &sh->dev[s.r6_failed_num[i]];
 			if (test_bit(R5_ReadError, &dev->flags)
 			    && !test_bit(R5_LOCKED, &dev->flags)
 			    && test_bit(R5_UPTODATE, &dev->flags)
@@ -3006,7 +3010,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			}
 		}
 
-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
 		/* Need to write out all blocks after computing P&Q */
 		sh->disks = conf->raid_disks;
 		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
@@ -3014,18 +3018,18 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		compute_parity6(sh, RECONSTRUCT_WRITE);
 		for (i = conf->raid_disks ; i-- ;  ) {
 			set_bit(R5_LOCKED, &sh->dev[i].flags);
-			locked++;
+			s.locked++;
 			set_bit(R5_Wantwrite, &sh->dev[i].flags);
 		}
 		clear_bit(STRIPE_EXPANDING, &sh->state);
-	} else if (expanded) {
+	} else if (s.expanded) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
 		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 	}
 
-	if (expanding && locked == 0) {
+	if (s.expanding && s.locked == 0) {
 		/* We have read all the blocks in this stripe and now we need to
 		 * copy some of them into a target stripe for expand.
 		 */
@@ -3118,7 +3122,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		rcu_read_unlock();
 
 		if (rdev) {
-			if (syncing || expanding || expanded)
+			if (s.syncing || s.expanding || s.expanded)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 3541d2c..54e2aa2 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -182,6 +182,17 @@ struct stripe_head {
 		unsigned long	flags;
 	} dev[1]; /* allocated with extra space depending of RAID geometry */
 };
+
+struct stripe_head_state {
+	int syncing, expanding, expanded;
+	int locked, uptodate, to_read, to_write, failed, written;
+	int to_fill, compute, req_compute, non_overwrite, dirty;
+	union {
+		int failed_num;
+		int r6_failed_num[2];
+	};
+};
+
 /* Flags */
 #define	R5_UPTODATE	0	/* page contains current data */
 #define	R5_LOCKED	1	/* IO has been submitted on "req" */

  reply	other threads:[~2007-04-11  6:00 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-11  6:00 [PATCH RFC 0/4] raid5: write-back caching policy and write performance Dan Williams
2007-04-11  6:00 ` Dan Williams [this message]
2007-04-11  6:00 ` [PATCH RFC 2/4] md: refactor raid5 cache policy code using 'struct stripe_cache_policy' Dan Williams
2007-04-11  6:00 ` [PATCH RFC 3/4] md: writeback caching policy for raid5 [experimental] Dan Williams
2007-04-11 22:40   ` Mark Hahn
2007-04-12  0:08     ` Williams, Dan J
2007-04-12  6:21       ` Neil Brown
2007-04-12  5:37   ` Al Boldi
2007-04-11  6:00 ` [PATCH RFC 4/4] md: delayed stripe activation Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070411060026.15745.84714.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.