[PATCH 07/16] md: move write operations to raid5_run_ops

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, akpm@linux-foundation.org, christopher.leech@intel.com
Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org
Subject: [PATCH 07/16] md: move write operations to raid5_run_ops
Date: Tue, 01 May 2007 23:18:11 -0700	[thread overview]
Message-ID: <20070502061811.7066.16213.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070502060949.7066.357.stgit@dwillia2-linux.ch.intel.com>

handle_stripe sets STRIPE_OP_PREXOR, STRIPE_OP_BIODRAIN, STRIPE_OP_POSTXOR
to request a write to the stripe cache.  raid5_run_ops is triggerred to run
and executes the request outside the stripe lock.

Changelog:
* make the 'rcw' parameter to handle_write_operations5 a simple flag, Neil
  Brown
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c |  151 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 130 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 14e9f6a..03a435d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1807,7 +1807,74 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 	}
 }
 
+static int handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+{
+	int i, pd_idx = sh->pd_idx, disks = sh->disks;
+	int locked=0;
+
+	if (rcw) {
+		/* skip the drain operation on an expand */
+		if (!expand) {
+			set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+			sh->ops.count++;
+		}
+
+		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+		sh->ops.count++;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+
+			if (dev->towrite) {
+				set_bit(R5_LOCKED, &dev->flags);
+				if (!expand)
+					clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	} else {
+		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
+			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+
+		set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+		set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+		sh->ops.count += 3;
+
+		for (i=disks ; i-- ;) {
+			struct r5dev *dev = &sh->dev[i];
+			if (i==pd_idx)
+				continue;
 
+			/* For a read-modify write there may be blocks that are
+			 * locked for reading while others are ready to be written
+			 * so we distinguish these blocks by the R5_Wantprexor bit
+			 */
+			if (dev->towrite &&
+			    (test_bit(R5_UPTODATE, &dev->flags) ||
+			    test_bit(R5_Wantcompute, &dev->flags))) {
+				set_bit(R5_Wantprexor, &dev->flags);
+				set_bit(R5_LOCKED, &dev->flags);
+				clear_bit(R5_UPTODATE, &dev->flags);
+				locked++;
+			}
+		}
+	}
+
+	/* keep the parity disk locked while asynchronous operations
+	 * are in flight
+	 */
+	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+	locked++;
+
+	PRINTK("%s: stripe %llu locked: %d pending: %lx\n",
+		__FUNCTION__, (unsigned long long)sh->sector,
+		locked, sh->ops.pending);
+
+	return locked;
+}
 
 /*
  * Each stripe/dev can have one or more bion attached.
@@ -2170,8 +2237,67 @@ static void handle_stripe5(struct stripe_head *sh)
 		set_bit(STRIPE_HANDLE, &sh->state);
 	}
 
-	/* now to consider writing and what else, if anything should be read */
-	if (to_write) {
+	/* Now we check to see if any write operations have recently
+	 * completed
+	 */
+
+	/* leave prexor set until postxor is done, allows us to distinguish
+	 * a rmw from a rcw during biodrain
+	 */
+	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+
+		for (i=disks; i--;)
+			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+	}
+
+	/* if only POSTXOR is set then this is an 'expand' postxor */
+	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
+		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
+		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+		/* All the 'written' buffers and the parity block are ready to be
+		 * written back to disk
+		 */
+		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+		for (i=disks; i--;) {
+			dev = &sh->dev[i];
+			if (test_bit(R5_LOCKED, &dev->flags) &&
+				(i == sh->pd_idx || dev->written)) {
+				PRINTK("Writing block %d\n", i);
+				set_bit(R5_Wantwrite, &dev->flags);
+				if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+					sh->ops.count++;
+				if (!test_bit(R5_Insync, &dev->flags)
+				    || (i==sh->pd_idx && failed == 0))
+					set_bit(STRIPE_INSYNC, &sh->state);
+			}
+		}
+		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+			atomic_dec(&conf->preread_active_stripes);
+			if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+				md_wakeup_thread(conf->mddev->thread);
+		}
+	}
+
+	/* 1/ Now to consider new write requests and what else, if anything should be read
+	 * 2/ Check operations clobber the parity block so do not start new writes while
+	 *    a check is in flight
+	 * 3/ Write operations do not stack
+	 */
+	if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
 		int rmw=0, rcw=0;
 		for (i=disks ; i--;) {
 			/* would I have to read this buffer for read_modify_write */
@@ -2238,25 +2364,8 @@ static void handle_stripe5(struct stripe_head *sh)
 			}
 		/* now if nothing is locked, and if we have enough data, we can start a write request */
 		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
-		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-			PRINTK("Computing parity...\n");
-			compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
-			/* now every locked buffer is ready to be written */
-			for (i=disks; i--;)
-				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-					PRINTK("Writing block %d\n", i);
-					locked++;
-					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || (i==sh->pd_idx && failed == 0))
-						set_bit(STRIPE_INSYNC, &sh->state);
-				}
-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-				atomic_dec(&conf->preread_active_stripes);
-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
-					md_wakeup_thread(conf->mddev->thread);
-			}
-		}
+		    !test_bit(STRIPE_BIT_DELAY, &sh->state))
+			locked += handle_write_operations5(sh, rcw == 0, 0);
 	}
 
 	/* maybe we need to check and possibly fix the parity for this stripe

next prev parent reply	other threads:[~2007-05-02  6:18 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-02  6:14 [PATCH 00/16] raid acceleration and asynchronous offload api for 2.6.22 Dan Williams
2007-05-02  6:15 ` [PATCH 01/16] dmaengine: add base support for the async_tx api Dan Williams
2007-05-02  6:15 ` [PATCH 02/16] dmaengine: move channel management to the client Dan Williams
2007-05-02  6:16 ` [PATCH 03/16] ARM: Add drivers/dma to arch/arm/Kconfig Dan Williams
2007-05-02  6:16 ` [PATCH 04/16] dmaengine: add the async_tx api Dan Williams
2007-05-02  6:17 ` [PATCH 05/16] md: add raid5_run_ops and support routines Dan Williams
2007-05-02  6:17 ` [PATCH 06/16] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-05-02  6:18 ` Dan Williams [this message]
2007-05-02  6:18 ` [PATCH 08/16] md: move raid5 compute block operations to raid5_run_ops Dan Williams
2007-05-02  6:19 ` [PATCH 09/16] md: move raid5 parity checks " Dan Williams
2007-05-02  6:19 ` [PATCH 10/16] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-05-02  6:20 ` [PATCH 11/16] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-05-02  6:20 ` [PATCH 12/16] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-05-02  6:21 ` [PATCH 13/16] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-05-02  6:21 ` [PATCH 14/16] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2007-05-02  6:22 ` [PATCH 15/16] iop13xx: Surface the iop13xx adma units to the iop-adma driver Dan Williams
2007-05-02  6:22 ` [PATCH 16/16] iop3xx: Surface the iop3xx DMA and AAU " Dan Williams
2007-05-02  6:55 ` [PATCH 00/16] raid acceleration and asynchronous offload api for 2.6.22 Nick Piggin
2007-05-02 15:45   ` Williams, Dan J
2007-05-02 15:55     ` Justin Piszcz
2007-05-02 16:17       ` Williams, Dan J
2007-05-02 16:19         ` Justin Piszcz
2007-05-02 16:36         ` Dagfinn Ilmari Mannsåker
2007-05-02 16:42           ` Williams, Dan J

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:14e9f6a dfblob:03a435d )
 OR (
bs:"[PATCH 07/16] md: move write operations to raid5_run_ops" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070502061811.7066.16213.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=christopher.leech@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).