[PATCH RFC 1/4] md: introduce struct stripe_head_state

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Dan Williams <dan.j.williams@intel.com>
To: linux-raid@vger.kernel.org
Subject: [PATCH RFC 1/4] md: introduce struct stripe_head_state
Date: Tue, 10 Apr 2007 23:00:26 -0700	[thread overview]
Message-ID: <20070411060026.15745.84714.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070411055729.15745.51513.stgit@dwillia2-linux.ch.intel.com>

struct stripe_head_state collects all the dynamic stripe-state information
that is calculated/tracked during calls to handle_stripe.  This enables a
mechanism for handle_stripe functionality to be broken off into
subroutines.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c         |  280 ++++++++++++++++++++++----------------------
 include/linux/raid/raid5.h |   11 ++
 2 files changed, 153 insertions(+), 138 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 74ce354..684552a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1872,12 +1872,14 @@ static void handle_stripe5(struct stripe_head *sh)
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
-	int syncing, expanding, expanded;
-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
-	int to_fill=0, compute=0, req_compute=0, non_overwrite=0;
-	int failed_num=0;
+	struct stripe_head_state s = {
+		.locked=0, .uptodate=0, .to_read=0, .to_write=0, .failed=0,
+		.written=0, .to_fill=0, .compute=0, .req_compute=0,
+		.non_overwrite=0,
+	};
 	struct r5dev *dev;
 	unsigned long pending=0;
+	s.failed_num=0;
 
 	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d ops=%lx:%lx:%lx\n",
 	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
@@ -1887,9 +1889,9 @@ static void handle_stripe5(struct stripe_head *sh)
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
 	rcu_read_lock();
@@ -1911,22 +1913,22 @@ static void handle_stripe5(struct stripe_head *sh)
 			set_bit(R5_Wantfill, &dev->flags);
 
 		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
+		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
 
 		if (test_bit(R5_Wantfill, &dev->flags))
-			to_fill++;
+			s.to_fill++;
 		else if (dev->toread)
-			to_read++;
+			s.to_read++;
 
-		if (test_bit(R5_Wantcompute, &dev->flags)) BUG_ON(++compute > 1);
+		if (test_bit(R5_Wantcompute, &dev->flags)) BUG_ON(++s.compute > 1);
 
 		if (dev->towrite) {
-			to_write++;
+			s.to_write++;
 			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				non_overwrite++;
+				s.non_overwrite++;
 		}
-		if (dev->written) written++;
+		if (dev->written) s.written++;
 		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
@@ -1935,23 +1937,24 @@ static void handle_stripe5(struct stripe_head *sh)
 		}
 		if (!rdev || !test_bit(In_sync, &rdev->flags)
 		    || test_bit(R5_ReadError, &dev->flags)) {
-			failed++;
-			failed_num = i;
+			s.failed++;
+			s.failed_num = i;
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
 	rcu_read_unlock();
 
-	if (to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
+	if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
 		sh->ops.count++;
 
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 		" to_write=%d to_fill=%d failed=%d failed_num=%d\n",
-		locked, uptodate, to_read, to_write, to_fill, failed, failed_num);
+		s.locked, s.uptodate, s.to_read, s.to_write, s.to_fill,
+		s.failed, s.failed_num);
 	/* check if the array has lost two devices and, if so, some requests might
 	 * need to be failed
 	 */
-	if (failed > 1 && to_read+to_write+written) {
+	if (s.failed > 1 && s.to_read+s.to_write+s.written) {
 		for (i=disks; i--; ) {
 			int bitmap_end = 0;
 
@@ -1969,7 +1972,7 @@ static void handle_stripe5(struct stripe_head *sh)
 			/* fail all writes first */
 			bi = sh->dev[i].towrite;
 			sh->dev[i].towrite = NULL;
-			if (bi) { to_write--; bitmap_end = 1; }
+			if (bi) { s.to_write--; bitmap_end = 1; }
 
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
@@ -2009,7 +2012,7 @@ static void handle_stripe5(struct stripe_head *sh)
 				sh->dev[i].toread = NULL;
 				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 					wake_up(&conf->wait_for_overlap);
-				if (bi) to_read--;
+				if (bi) s.to_read--;
 				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
 					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 					clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2026,20 +2029,20 @@ static void handle_stripe5(struct stripe_head *sh)
 						STRIPE_SECTORS, 0, 0);
 		}
 	}
-	if (failed > 1 && syncing) {
+	if (s.failed > 1 && s.syncing) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
 		clear_bit(STRIPE_SYNCING, &sh->state);
-		syncing = 0;
+		s.syncing = 0;
 	}
 
 	/* might be able to return some write requests if the parity block
 	 * is safe, or on a failed drive
 	 */
 	dev = &sh->dev[sh->pd_idx];
-	if ( written &&
+	if ( s.written &&
 	     ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
 		test_bit(R5_UPTODATE, &dev->flags))
-	       || (failed == 1 && failed_num == sh->pd_idx))
+	       || (s.failed == 1 && s.failed_num == sh->pd_idx))
 	    ) {
 	    /* any written block on an uptodate or failed drive can be returned.
 	     * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 
@@ -2081,8 +2084,8 @@ static void handle_stripe5(struct stripe_head *sh)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (syncing && (uptodate + compute < disks)) || expanding ||
-		test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+	if (s.to_read || s.non_overwrite || (s.syncing && (s.uptodate + s.compute < disks)) ||
+		s.expanding || test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
 
 		/* Clear completed compute operations.  Parity recovery
 		 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
@@ -2114,11 +2117,11 @@ static void handle_stripe5(struct stripe_head *sh)
 				if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 				     (dev->toread ||
 				     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-				     syncing ||
-				     expanding ||
-				     (failed && (sh->dev[failed_num].toread ||
-						 (sh->dev[failed_num].towrite &&
-						 	!test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
+				     s.syncing ||
+				     s.expanding ||
+				     (s.failed && (sh->dev[s.failed_num].toread ||
+						 (sh->dev[s.failed_num].towrite &&
+						 	!test_bit(R5_OVERWRITE, &sh->dev[s.failed_num].flags))))
 					    )
 					) {
 					/* 1/ We would like to get this block, possibly
@@ -2132,20 +2135,20 @@ static void handle_stripe5(struct stripe_head *sh)
 					 * 3/ We hold off parity block re-reads until check
 					 * operations have quiesced.
 					 */
-					if ((uptodate == disks-1) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+					if ((s.uptodate == disks-1) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 						set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
 						set_bit(R5_Wantcompute, &dev->flags);
 						sh->ops.target = i;
-						req_compute = 1;
+						s.req_compute = 1;
 						sh->ops.count++;
 						/* Careful: from this point on 'uptodate' is in the eye of
 						 * raid5_run_ops which services 'compute' operations before
 						 * writes. R5_Wantcompute flags a block that will be R5_UPTODATE
 						 * by the time it is needed for a subsequent operation.
 						 */
-						uptodate++;
+						s.uptodate++;
 						break; /* uptodate + compute == disks */
-					} else if ((uptodate < disks-1) && test_bit(R5_Insync, &dev->flags)) {
+					} else if ((s.uptodate < disks-1) && test_bit(R5_Insync, &dev->flags)) {
 						/* Note: we hold off compute operations while checks are in flight,
 						 * but we still prefer 'compute' over 'read' hence we only read if
 						 * (uptodate < disks-1)
@@ -2154,9 +2157,9 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 						PRINTK("Reading block %d (sync=%d)\n",
-							i, syncing);
+							i, s.syncing);
 					}
 				}
 			}
@@ -2207,7 +2210,7 @@ static void handle_stripe5(struct stripe_head *sh)
 				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 					sh->ops.count++;
 				if (!test_bit(R5_Insync, &dev->flags)
-				    || (i==sh->pd_idx && failed == 0))
+				    || (i==sh->pd_idx && s.failed == 0))
 					set_bit(STRIPE_INSYNC, &sh->state);
 			}
 		}
@@ -2223,7 +2226,7 @@ static void handle_stripe5(struct stripe_head *sh)
 	 *    a check is in flight
 	 * 3/ Write operations do not stack
 	 */
-	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+	if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
 		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
@@ -2266,7 +2269,7 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 					} else {
 						set_bit(STRIPE_DELAYED, &sh->state);
 						set_bit(STRIPE_HANDLE, &sh->state);
@@ -2288,7 +2291,7 @@ static void handle_stripe5(struct stripe_head *sh)
 						set_bit(R5_Wantread, &dev->flags);
 						if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 							sh->ops.count++;
-						locked++;
+						s.locked++;
 					} else {
 						set_bit(STRIPE_DELAYED, &sh->state);
 						set_bit(STRIPE_HANDLE, &sh->state);
@@ -2303,10 +2306,10 @@ static void handle_stripe5(struct stripe_head *sh)
 		 * is not the case then new writes need to be held off until the compute
 		 * completes.
 		 */
-		if ((req_compute || !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
-			(locked == 0 && (rcw == 0 ||rmw == 0) &&
+		if ((s.req_compute || !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
+			(s.locked == 0 && (rcw == 0 ||rmw == 0) &&
 			!test_bit(STRIPE_BIT_DELAY, &sh->state)))
-			locked += handle_write_operations5(sh, rcw == 0, 0);
+			s.locked += handle_write_operations5(sh, rcw == 0, 0);
 	}
 
 	/* 1/ Maybe we need to check and possibly fix the parity for this stripe.
@@ -2315,7 +2318,7 @@ static void handle_stripe5(struct stripe_head *sh)
 	 * 2/ Hold off parity checks while parity dependent operations are in flight
 	 *    (conflicting writes are protected by the 'locked' variable)
 	 */
-	if ((syncing && locked == 0 && !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
+	if ((s.syncing && s.locked == 0 && !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
 		!test_bit(STRIPE_INSYNC, &sh->state)) ||
 	    	test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
 	    	test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
@@ -2327,12 +2330,12 @@ static void handle_stripe5(struct stripe_head *sh)
 		 * 3/ skip to the writeback section if we previously
 		 *    initiated a recovery operation
 		 */
-		if (failed == 0 && !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+		if (s.failed == 0 && !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
 			if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
-				BUG_ON(uptodate != disks);
+				BUG_ON(s.uptodate != disks);
 				clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
 				sh->ops.count++;
-				uptodate--;
+				s.uptodate--;
 			} else if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
 				clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
 				clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
@@ -2354,7 +2357,7 @@ static void handle_stripe5(struct stripe_head *sh)
 							&sh->dev[sh->pd_idx].flags);
 						sh->ops.target = sh->pd_idx;
 						sh->ops.count++;
-						uptodate++;
+						s.uptodate++;
 					}
 				}
 			}
@@ -2378,22 +2381,22 @@ static void handle_stripe5(struct stripe_head *sh)
 			!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
 
 			/* either failed parity check, or recovery is happening */
-			if (failed==0)
-				failed_num = sh->pd_idx;
-			dev = &sh->dev[failed_num];
+			if (s.failed==0)
+				s.failed_num = sh->pd_idx;
+			dev = &sh->dev[s.failed_num];
 			BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
-			BUG_ON(uptodate != disks);
+			BUG_ON(s.uptodate != disks);
 
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantwrite, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			clear_bit(STRIPE_DEGRADED, &sh->state);
-			locked++;
+			s.locked++;
 			set_bit(STRIPE_INSYNC, &sh->state);
 		}
 	}
-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
@@ -2401,26 +2404,26 @@ static void handle_stripe5(struct stripe_head *sh)
 	/* If the failed drive is just a ReadError, then we might need to progress
 	 * the repair/check process
 	 */
-	if (failed == 1 && ! conf->mddev->ro &&
-	    test_bit(R5_ReadError, &sh->dev[failed_num].flags)
-	    && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
-	    && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
+	if (s.failed == 1 && ! conf->mddev->ro &&
+	    test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
+	    && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
+	    && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
 		) {
-		dev = &sh->dev[failed_num];
+		dev = &sh->dev[s.failed_num];
 		if (!test_bit(R5_ReWrite, &dev->flags)) {
 			set_bit(R5_Wantwrite, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			set_bit(R5_ReWrite, &dev->flags);
 			set_bit(R5_LOCKED, &dev->flags);
-			locked++;
+			s.locked++;
 		} else {
 			/* let's read it back */
 			set_bit(R5_Wantread, &dev->flags);
 			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
 				sh->ops.count++;
 			set_bit(R5_LOCKED, &dev->flags);
-			locked++;
+			s.locked++;
 		}
 	}
 
@@ -2443,20 +2446,20 @@ static void handle_stripe5(struct stripe_head *sh)
 		}
 	}
 
-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
 		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
 		/* Need to write out all blocks after computing parity */
 		sh->disks = conf->raid_disks;
 		sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
-		locked += handle_write_operations5(sh, 0, 1);
-	} else if (expanded && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+		s.locked += handle_write_operations5(sh, 0, 1);
+	} else if (s.expanded && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
 		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 	}
 
-	if (expanding && locked == 0) {
+	if (s.expanding && s.locked == 0) {
 		/* We have read all the blocks in this stripe and now we need to
 		 * copy some of them into a target stripe for expand.
 		 */
@@ -2537,14 +2540,15 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
-	int syncing, expanding, expanded;
-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
-	int non_overwrite = 0;
-	int failed_num[2] = {0, 0};
+	struct stripe_head_state s = {
+		.locked=0, .uptodate=0, .to_read=0, .to_write=0, .failed=0,
+		.written=0, .non_overwrite = 0,
+	};
 	struct r5dev *dev, *pdev, *qdev;
 	int pd_idx = sh->pd_idx;
 	int qd_idx = raid6_next_disk(pd_idx, disks);
 	int p_failed, q_failed;
+	s.r6_failed_num[0] = s.r6_failed_num[1] = 0;
 
 	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
 	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
@@ -2554,9 +2558,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
 	rcu_read_lock();
@@ -2591,17 +2595,17 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 
 		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
+		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
 
 
-		if (dev->toread) to_read++;
+		if (dev->toread) s.to_read++;
 		if (dev->towrite) {
-			to_write++;
+			s.to_write++;
 			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				non_overwrite++;
+				s.non_overwrite++;
 		}
-		if (dev->written) written++;
+		if (dev->written) s.written++;
 		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
@@ -2610,21 +2614,21 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 		if (!rdev || !test_bit(In_sync, &rdev->flags)
 		    || test_bit(R5_ReadError, &dev->flags)) {
-			if ( failed < 2 )
-				failed_num[failed] = i;
-			failed++;
+			if ( s.failed < 2 )
+				s.r6_failed_num[s.failed] = i;
+			s.failed++;
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
 	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
-	       locked, uptodate, to_read, to_write, failed,
-	       failed_num[0], failed_num[1]);
+	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
+	       s.r6_failed_num[0], s.r6_failed_num[1]);
 	/* check if the array has lost >2 devices and, if so, some requests might
 	 * need to be failed
 	 */
-	if (failed > 2 && to_read+to_write+written) {
+	if (s.failed > 2 && s.to_read+s.to_write+s.written) {
 		for (i=disks; i--; ) {
 			int bitmap_end = 0;
 
@@ -2642,7 +2646,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			/* fail all writes first */
 			bi = sh->dev[i].towrite;
 			sh->dev[i].towrite = NULL;
-			if (bi) { to_write--; bitmap_end = 1; }
+			if (bi) { s.to_write--; bitmap_end = 1; }
 
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
@@ -2679,7 +2683,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				sh->dev[i].toread = NULL;
 				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 					wake_up(&conf->wait_for_overlap);
-				if (bi) to_read--;
+				if (bi) s.to_read--;
 				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
 					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 					clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2696,10 +2700,10 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 						STRIPE_SECTORS, 0, 0);
 		}
 	}
-	if (failed > 2 && syncing) {
+	if (s.failed > 2 && s.syncing) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
 		clear_bit(STRIPE_SYNCING, &sh->state);
-		syncing = 0;
+		s.syncing = 0;
 	}
 
 	/*
@@ -2707,13 +2711,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * are safe, or on a failed drive
 	 */
 	pdev = &sh->dev[pd_idx];
-	p_failed = (failed >= 1 && failed_num[0] == pd_idx)
-		|| (failed >= 2 && failed_num[1] == pd_idx);
+	p_failed = (s.failed >= 1 && s.r6_failed_num[0] == pd_idx)
+		|| (s.failed >= 2 && s.r6_failed_num[1] == pd_idx);
 	qdev = &sh->dev[qd_idx];
-	q_failed = (failed >= 1 && failed_num[0] == qd_idx)
-		|| (failed >= 2 && failed_num[1] == qd_idx);
+	q_failed = (s.failed >= 1 && s.r6_failed_num[0] == qd_idx)
+		|| (s.failed >= 2 && s.r6_failed_num[1] == qd_idx);
 
-	if ( written &&
+	if ( s.written &&
 	     ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
 			     && !test_bit(R5_LOCKED, &pdev->flags)
 			     && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
@@ -2762,28 +2766,28 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (to_write && failed) ||
-	    (syncing && (uptodate < disks)) || expanding) {
+	if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
+	    (s.syncing && (s.uptodate < disks)) || s.expanding) {
 		for (i=disks; i--;) {
 			dev = &sh->dev[i];
 			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 			    (dev->toread ||
 			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-			     syncing ||
-			     expanding ||
-			     (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
-			     (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
+			     s.syncing ||
+			     s.expanding ||
+			     (s.failed >= 1 && (sh->dev[s.r6_failed_num[0]].toread || s.to_write)) ||
+			     (s.failed >= 2 && (sh->dev[s.r6_failed_num[1]].toread || s.to_write))
 				    )
 				) {
 				/* we would like to get this block, possibly
 				 * by computing it, but we might not be able to
 				 */
-				if (uptodate == disks-1) {
+				if (s.uptodate == disks-1) {
 					PRINTK("Computing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
 					compute_block_1(sh, i, 0);
-					uptodate++;
-				} else if ( uptodate == disks-2 && failed >= 2 ) {
+					s.uptodate++;
+				} else if ( s.uptodate == disks-2 && s.failed >= 2 ) {
 					/* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
 					int other;
 					for (other=disks; other--;) {
@@ -2796,13 +2800,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 					PRINTK("Computing stripe %llu blocks %d,%d\n",
 					       (unsigned long long)sh->sector, i, other);
 					compute_block_2(sh, i, other);
-					uptodate += 2;
+					s.uptodate += 2;
 				} else if (test_bit(R5_Insync, &dev->flags)) {
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
-					locked++;
+					s.locked++;
 					PRINTK("Reading block %d (sync=%d)\n",
-						i, syncing);
+						i, s.syncing);
 				}
 			}
 		}
@@ -2810,7 +2814,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	}
 
 	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	if (s.to_write) {
 		int rcw=0, must_compute=0;
 		for (i=disks ; i--;) {
 			dev = &sh->dev[i];
@@ -2836,7 +2840,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			for (i=disks; i--;) {
 				dev = &sh->dev[i];
 				if (!test_bit(R5_OVERWRITE, &dev->flags)
-				    && !(failed == 0 && (i == pd_idx || i == qd_idx))
+				    && !(s.failed == 0 && (i == pd_idx || i == qd_idx))
 				    && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 				    test_bit(R5_Insync, &dev->flags)) {
 					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
@@ -2845,7 +2849,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 						       (unsigned long long)sh->sector, i);
 						set_bit(R5_LOCKED, &dev->flags);
 						set_bit(R5_Wantread, &dev->flags);
-						locked++;
+						s.locked++;
 					} else {
 						PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
 						       (unsigned long long)sh->sector, i);
@@ -2855,14 +2859,14 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				}
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
-		if (locked == 0 && rcw == 0 &&
+		if (s.locked == 0 && rcw == 0 &&
 		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
 			if ( must_compute > 0 ) {
 				/* We have failed blocks and need to compute them */
-				switch ( failed ) {
+				switch ( s.failed ) {
 				case 0:	BUG();
-				case 1: compute_block_1(sh, failed_num[0], 0); break;
-				case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
+				case 1: compute_block_1(sh, s.r6_failed_num[0], 0); break;
+				case 2: compute_block_2(sh, s.r6_failed_num[0], s.r6_failed_num[1]); break;
 				default: BUG();	/* This request should have been failed? */
 				}
 			}
@@ -2874,7 +2878,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
 					PRINTK("Writing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
-					locked++;
+					s.locked++;
 					set_bit(R5_Wantwrite, &sh->dev[i].flags);
 				}
 			/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
@@ -2892,14 +2896,14 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	 * Any reads will already have been scheduled, so we just see if enough data
 	 * is available
 	 */
-	if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
 		int update_p = 0, update_q = 0;
 		struct r5dev *dev;
 
 		set_bit(STRIPE_HANDLE, &sh->state);
 
-		BUG_ON(failed>2);
-		BUG_ON(uptodate < disks);
+		BUG_ON(s.failed>2);
+		BUG_ON(s.uptodate < disks);
 		/* Want to check and possibly repair P and Q.
 		 * However there could be one 'failed' device, in which
 		 * case we can only check one of them, possibly using the
@@ -2911,7 +2915,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		 * by stripe_handle with a tmp_page - just wait until then.
 		 */
 		if (tmp_page) {
-			if (failed == q_failed) {
+			if (s.failed == q_failed) {
 				/* The only possible failed device holds 'Q', so it makes
 				 * sense to check P (If anything else were failed, we would
 				 * have used P to recreate it).
@@ -2922,7 +2926,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 					update_p = 1;
 				}
 			}
-			if (!q_failed && failed < 2) {
+			if (!q_failed && s.failed < 2) {
 				/* q is not failed, and we didn't use it to generate
 				 * anything, so it makes sense to check it
 				 */
@@ -2948,28 +2952,28 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			 * or P or Q if they need it
 			 */
 
-			if (failed == 2) {
-				dev = &sh->dev[failed_num[1]];
-				locked++;
+			if (s.failed == 2) {
+				dev = &sh->dev[s.r6_failed_num[1]];
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
-			if (failed >= 1) {
-				dev = &sh->dev[failed_num[0]];
-				locked++;
+			if (s.failed >= 1) {
+				dev = &sh->dev[s.r6_failed_num[0]];
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
 
 			if (update_p) {
 				dev = &sh->dev[pd_idx];
-				locked ++;
+				s.locked ++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
 			if (update_q) {
 				dev = &sh->dev[qd_idx];
-				locked++;
+				s.locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
 			}
@@ -2979,7 +2983,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		}
 	}
 
-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
@@ -2987,9 +2991,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 	/* If the failed drives are just a ReadError, then we might need
 	 * to progress the repair/check process
 	 */
-	if (failed <= 2 && ! conf->mddev->ro)
-		for (i=0; i<failed;i++) {
-			dev = &sh->dev[failed_num[i]];
+	if (s.failed <= 2 && ! conf->mddev->ro)
+		for (i=0; i<s.failed;i++) {
+			dev = &sh->dev[s.r6_failed_num[i]];
 			if (test_bit(R5_ReadError, &dev->flags)
 			    && !test_bit(R5_LOCKED, &dev->flags)
 			    && test_bit(R5_UPTODATE, &dev->flags)
@@ -3006,7 +3010,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 			}
 		}
 
-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
 		/* Need to write out all blocks after computing P&Q */
 		sh->disks = conf->raid_disks;
 		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
@@ -3014,18 +3018,18 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		compute_parity6(sh, RECONSTRUCT_WRITE);
 		for (i = conf->raid_disks ; i-- ;  ) {
 			set_bit(R5_LOCKED, &sh->dev[i].flags);
-			locked++;
+			s.locked++;
 			set_bit(R5_Wantwrite, &sh->dev[i].flags);
 		}
 		clear_bit(STRIPE_EXPANDING, &sh->state);
-	} else if (expanded) {
+	} else if (s.expanded) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
 		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 	}
 
-	if (expanding && locked == 0) {
+	if (s.expanding && s.locked == 0) {
 		/* We have read all the blocks in this stripe and now we need to
 		 * copy some of them into a target stripe for expand.
 		 */
@@ -3118,7 +3122,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 		rcu_read_unlock();
 
 		if (rdev) {
-			if (syncing || expanding || expanded)
+			if (s.syncing || s.expanding || s.expanded)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 3541d2c..54e2aa2 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -182,6 +182,17 @@ struct stripe_head {
 		unsigned long	flags;
 	} dev[1]; /* allocated with extra space depending of RAID geometry */
 };
+
+struct stripe_head_state {
+	int syncing, expanding, expanded;
+	int locked, uptodate, to_read, to_write, failed, written;
+	int to_fill, compute, req_compute, non_overwrite, dirty;
+	union {
+		int failed_num;
+		int r6_failed_num[2];
+	};
+};
+
 /* Flags */
 #define	R5_UPTODATE	0	/* page contains current data */
 #define	R5_LOCKED	1	/* IO has been submitted on "req" */

next prev parent reply	other threads:[~2007-04-11  6:00 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-11  6:00 [PATCH RFC 0/4] raid5: write-back caching policy and write performance Dan Williams
2007-04-11  6:00 ` Dan Williams [this message]
2007-04-11  6:00 ` [PATCH RFC 2/4] md: refactor raid5 cache policy code using 'struct stripe_cache_policy' Dan Williams
2007-04-11  6:00 ` [PATCH RFC 3/4] md: writeback caching policy for raid5 [experimental] Dan Williams
2007-04-11 22:40   ` Mark Hahn
2007-04-12  0:08     ` Williams, Dan J
2007-04-12  6:21       ` Neil Brown
2007-04-12  5:37   ` Al Boldi
2007-04-11  6:00 ` [PATCH RFC 4/4] md: delayed stripe activation Dan Williams

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:74ce354 dfblob:684552a dfblob:3541d2c dfblob:54e2aa2 )
 OR (
bs:"[PATCH RFC 1/4] md: introduce struct stripe_head_state" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070411060026.15745.84714.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).