From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, akpm@linux-foundation.org, christopher.leech@intel.com
Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org
Subject: [PATCH 07/16] md: move write operations to raid5_run_ops
Date: Tue, 01 May 2007 23:18:11 -0700 [thread overview]
Message-ID: <20070502061811.7066.16213.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070502060949.7066.357.stgit@dwillia2-linux.ch.intel.com>
handle_stripe sets STRIPE_OP_PREXOR, STRIPE_OP_BIODRAIN, STRIPE_OP_POSTXOR
to request a write to the stripe cache. raid5_run_ops is triggerred to run
and executes the request outside the stripe lock.
Changelog:
* make the 'rcw' parameter to handle_write_operations5 a simple flag, Neil
Brown
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/md/raid5.c | 151 +++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 130 insertions(+), 21 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 14e9f6a..03a435d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1807,7 +1807,74 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
}
}
+static int handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+{
+ int i, pd_idx = sh->pd_idx, disks = sh->disks;
+ int locked=0;
+
+ if (rcw) {
+ /* skip the drain operation on an expand */
+ if (!expand) {
+ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+ sh->ops.count++;
+ }
+
+ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+ sh->ops.count++;
+
+ for (i=disks ; i-- ;) {
+ struct r5dev *dev = &sh->dev[i];
+
+ if (dev->towrite) {
+ set_bit(R5_LOCKED, &dev->flags);
+ if (!expand)
+ clear_bit(R5_UPTODATE, &dev->flags);
+ locked++;
+ }
+ }
+ } else {
+ BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
+ test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+
+ set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+ sh->ops.count += 3;
+
+ for (i=disks ; i-- ;) {
+ struct r5dev *dev = &sh->dev[i];
+ if (i==pd_idx)
+ continue;
+ /* For a read-modify write there may be blocks that are
+ * locked for reading while others are ready to be written
+ * so we distinguish these blocks by the R5_Wantprexor bit
+ */
+ if (dev->towrite &&
+ (test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ set_bit(R5_Wantprexor, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ clear_bit(R5_UPTODATE, &dev->flags);
+ locked++;
+ }
+ }
+ }
+
+ /* keep the parity disk locked while asynchronous operations
+ * are in flight
+ */
+ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ locked++;
+
+ PRINTK("%s: stripe %llu locked: %d pending: %lx\n",
+ __FUNCTION__, (unsigned long long)sh->sector,
+ locked, sh->ops.pending);
+
+ return locked;
+}
/*
* Each stripe/dev can have one or more bion attached.
@@ -2170,8 +2237,67 @@ static void handle_stripe5(struct stripe_head *sh)
set_bit(STRIPE_HANDLE, &sh->state);
}
- /* now to consider writing and what else, if anything should be read */
- if (to_write) {
+ /* Now we check to see if any write operations have recently
+ * completed
+ */
+
+ /* leave prexor set until postxor is done, allows us to distinguish
+ * a rmw from a rcw during biodrain
+ */
+ if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
+ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+ clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
+ clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
+ clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+
+ for (i=disks; i--;)
+ clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+ }
+
+ /* if only POSTXOR is set then this is an 'expand' postxor */
+ if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
+ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+
+ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
+ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
+ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
+
+ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+
+ /* All the 'written' buffers and the parity block are ready to be
+ * written back to disk
+ */
+ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+ for (i=disks; i--;) {
+ dev = &sh->dev[i];
+ if (test_bit(R5_LOCKED, &dev->flags) &&
+ (i == sh->pd_idx || dev->written)) {
+ PRINTK("Writing block %d\n", i);
+ set_bit(R5_Wantwrite, &dev->flags);
+ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+ sh->ops.count++;
+ if (!test_bit(R5_Insync, &dev->flags)
+ || (i==sh->pd_idx && failed == 0))
+ set_bit(STRIPE_INSYNC, &sh->state);
+ }
+ }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+ }
+
+ /* 1/ Now to consider new write requests and what else, if anything should be read
+ * 2/ Check operations clobber the parity block so do not start new writes while
+ * a check is in flight
+ * 3/ Write operations do not stack
+ */
+ if (to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
+ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
int rmw=0, rcw=0;
for (i=disks ; i--;) {
/* would I have to read this buffer for read_modify_write */
@@ -2238,25 +2364,8 @@ static void handle_stripe5(struct stripe_head *sh)
}
/* now if nothing is locked, and if we have enough data, we can start a write request */
if (locked == 0 && (rcw == 0 ||rmw == 0) &&
- !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
- PRINTK("Computing parity...\n");
- compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
- /* now every locked buffer is ready to be written */
- for (i=disks; i--;)
- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
- PRINTK("Writing block %d\n", i);
- locked++;
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- if (!test_bit(R5_Insync, &sh->dev[i].flags)
- || (i==sh->pd_idx && failed == 0))
- set_bit(STRIPE_INSYNC, &sh->state);
- }
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
- }
+ !test_bit(STRIPE_BIT_DELAY, &sh->state))
+ locked += handle_write_operations5(sh, rcw == 0, 0);
}
/* maybe we need to check and possibly fix the parity for this stripe
next prev parent reply other threads:[~2007-05-02 6:18 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-02 6:14 [PATCH 00/16] raid acceleration and asynchronous offload api for 2.6.22 Dan Williams
2007-05-02 6:15 ` [PATCH 01/16] dmaengine: add base support for the async_tx api Dan Williams
2007-05-02 6:15 ` [PATCH 02/16] dmaengine: move channel management to the client Dan Williams
2007-05-02 6:16 ` [PATCH 03/16] ARM: Add drivers/dma to arch/arm/Kconfig Dan Williams
2007-05-02 6:16 ` [PATCH 04/16] dmaengine: add the async_tx api Dan Williams
2007-05-02 6:17 ` [PATCH 05/16] md: add raid5_run_ops and support routines Dan Williams
2007-05-02 6:17 ` [PATCH 06/16] md: use raid5_run_ops for stripe cache operations Dan Williams
2007-05-02 6:18 ` Dan Williams [this message]
2007-05-02 6:18 ` [PATCH 08/16] md: move raid5 compute block operations to raid5_run_ops Dan Williams
2007-05-02 6:19 ` [PATCH 09/16] md: move raid5 parity checks " Dan Williams
2007-05-02 6:19 ` [PATCH 10/16] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2007-05-02 6:20 ` [PATCH 11/16] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2007-05-02 6:20 ` [PATCH 12/16] md: move raid5 io requests to raid5_run_ops Dan Williams
2007-05-02 6:21 ` [PATCH 13/16] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-05-02 6:21 ` [PATCH 14/16] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2007-05-02 6:22 ` [PATCH 15/16] iop13xx: Surface the iop13xx adma units to the iop-adma driver Dan Williams
2007-05-02 6:22 ` [PATCH 16/16] iop3xx: Surface the iop3xx DMA and AAU " Dan Williams
2007-05-02 6:55 ` [PATCH 00/16] raid acceleration and asynchronous offload api for 2.6.22 Nick Piggin
2007-05-02 15:45 ` Williams, Dan J
2007-05-02 15:55 ` Justin Piszcz
2007-05-02 16:17 ` Williams, Dan J
2007-05-02 16:19 ` Justin Piszcz
2007-05-02 16:36 ` Dagfinn Ilmari Mannsåker
2007-05-02 16:42 ` Williams, Dan J
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070502061811.7066.16213.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=akpm@linux-foundation.org \
--cc=christopher.leech@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).