[md-accel PATCH 08/19] md: common infrastructure for running operations with raid5_run_ops

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Dan Williams <dan.j.williams@intel.com>
To: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org
Cc: neilb@suse.de, akpm@linux-foundation.org, davem@davemloft.net,
	christopher.leech@intel.com, shannon.nelson@intel.com,
	herbert@gondor.apana.org.au, jeff@garzik.org
Subject: [md-accel PATCH 08/19] md: common infrastructure for running operations with raid5_run_ops
Date: Tue, 26 Jun 2007 18:51:15 -0700	[thread overview]
Message-ID: <20070627015115.18962.76642.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20070627014823.18962.96398.stgit@dwillia2-linux.ch.intel.com>

All the handle_stripe operations that are to be transitioned to use
raid5_run_ops need a method to coherently gather work under the stripe-lock
and hand that work off to raid5_run_ops.  The 'get_stripe_work' routine
runs under the lock to read all the bits in sh->ops.pending that do not
have the corresponding bit set in sh->ops.ack.  This modified 'pending'
bitmap is then passed to raid5_run_ops for processing.

The transition from 'ack' to 'completion' does not need similar protection
as the existing release_stripe infrastructure will guarantee that
handle_stripe will run again after a completion bit is set, and
handle_stripe can tolerate a sh->ops.completed bit being set while the lock
is held.

A call to async_tx_issue_pending_all() is added to raid5d to kick the
offload engines once all pending stripe operations work has been submitted.
This enables batching of the submission and completion of operations.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
---

 drivers/md/raid5.c |   67 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 34fcda0..7c688f6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -124,6 +124,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
 			}
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
+			BUG_ON(sh->ops.pending);
 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 				atomic_dec(&conf->preread_active_stripes);
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -225,7 +226,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
 
 	BUG_ON(atomic_read(&sh->count) != 0);
 	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
-	
+	BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
+
 	CHECK_DEVLOCK();
 	pr_debug("init_stripe called, stripe %llu\n",
 		(unsigned long long)sh->sector);
@@ -241,11 +243,11 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
 	for (i = sh->disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
 
-		if (dev->toread || dev->towrite || dev->written ||
+		if (dev->toread || dev->read || dev->towrite || dev->written ||
 		    test_bit(R5_LOCKED, &dev->flags)) {
-			printk("sector=%llx i=%d %p %p %p %d\n",
+			printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
 			       (unsigned long long)sh->sector, i, dev->toread,
-			       dev->towrite, dev->written,
+			       dev->read, dev->towrite, dev->written,
 			       test_bit(R5_LOCKED, &dev->flags));
 			BUG();
 		}
@@ -325,6 +327,44 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 	return sh;
 }
 
+/* test_and_ack_op() ensures that we only dequeue an operation once */
+#define test_and_ack_op(op, pend) \
+do {							\
+	if (test_bit(op, &sh->ops.pending) &&		\
+		!test_bit(op, &sh->ops.complete)) {	\
+		if (test_and_set_bit(op, &sh->ops.ack)) \
+			clear_bit(op, &pend);		\
+		else					\
+			ack++;				\
+	} else						\
+		clear_bit(op, &pend);			\
+} while (0)
+
+/* find new work to run, do not resubmit work that is already
+ * in flight
+ */
+static unsigned long get_stripe_work(struct stripe_head *sh)
+{
+	unsigned long pending;
+	int ack = 0;
+
+	pending = sh->ops.pending;
+
+	test_and_ack_op(STRIPE_OP_BIOFILL, pending);
+	test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
+	test_and_ack_op(STRIPE_OP_PREXOR, pending);
+	test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
+	test_and_ack_op(STRIPE_OP_POSTXOR, pending);
+	test_and_ack_op(STRIPE_OP_CHECK, pending);
+	if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
+		ack++;
+
+	sh->ops.count -= ack;
+	BUG_ON(sh->ops.count < 0);
+
+	return pending;
+}
+
 static int
 raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
 static int
@@ -2487,7 +2527,6 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
  *    schedule a write of some buffers
  *    return confirmation of parity correctness
  *
- * Parity calculations are done inside the stripe lock
  * buffers are taken off read_list or write_list, and bh_cache buffers
  * get BH_Lock set before the stripe lock is released.
  *
@@ -2500,11 +2539,13 @@ static void handle_stripe5(struct stripe_head *sh)
 	struct bio *return_bi = NULL, *bi;
 	struct stripe_head_state s;
 	struct r5dev *dev;
+	unsigned long pending = 0;
 
 	memset(&s, 0, sizeof(s));
-	pr_debug("handling stripe %llu, cnt=%d, pd_idx=%d\n",
-		(unsigned long long)sh->sector, atomic_read(&sh->count),
-		sh->pd_idx);
+	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
+		"ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
+		atomic_read(&sh->count), sh->pd_idx,
+		sh->ops.pending, sh->ops.ack, sh->ops.complete);
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2667,8 +2708,14 @@ static void handle_stripe5(struct stripe_head *sh)
 	if (s.expanding && s.locked == 0)
 		handle_stripe_expansion(conf, sh, NULL);
 
+	if (sh->ops.count)
+		pending = get_stripe_work(sh);
+
 	spin_unlock(&sh->lock);
 
+	if (pending)
+		raid5_run_ops(sh, pending);
+
 	while ((bi=return_bi)) {
 		int bytes = bi->bi_size;
 
@@ -3808,8 +3855,10 @@ static void raid5d (mddev_t *mddev)
 			handled++;
 		}
 
-		if (list_empty(&conf->handle_list))
+		if (list_empty(&conf->handle_list)) {
+			async_tx_issue_pending_all();
 			break;
+		}
 
 		first = conf->handle_list.next;
 		sh = list_entry(first, struct stripe_head, lru);

next prev parent reply	other threads:[~2007-06-27  1:51 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-27  1:50 [md-accel PATCH 00/19] md raid acceleration and the async_tx api Dan Williams
2007-06-27  1:50 ` [md-accel PATCH 01/19] dmaengine: refactor dmaengine around dma_async_tx_descriptor Dan Williams
2007-06-27  1:50 ` [md-accel PATCH 02/19] dmaengine: make clients responsible for managing channels Dan Williams
2007-06-27  1:50 ` [md-accel PATCH 03/19] xor: make 'xor_blocks' a library routine for use with async_tx Dan Williams
2007-06-27  6:39   ` Satyam Sharma
2007-06-27 16:13     ` Dan Williams
2007-06-27 16:22       ` Herbert Xu
2007-06-27  1:50 ` [md-accel PATCH 04/19] async_tx: add the async_tx api Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 05/19] raid5: refactor handle_stripe5 and handle_stripe6 (v2) Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 06/19] raid5: replace custom debug PRINTKs with standard pr_debug Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 07/19] md: raid5_run_ops - run stripe operations outside sh->lock Dan Williams
2007-06-27  1:51 ` Dan Williams [this message]
2007-06-27  1:51 ` [md-accel PATCH 09/19] md: handle_stripe5 - add request/completion logic for async write ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 10/19] md: handle_stripe5 - add request/completion logic for async compute ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 11/19] md: handle_stripe5 - add request/completion logic for async check ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 12/19] md: handle_stripe5 - add request/completion logic for async read ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 13/19] md: handle_stripe5 - add request/completion logic for async expand ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 14/19] md: handle_stripe5 - request io processing in raid5_run_ops Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 15/19] md: remove raid5 compute_block and compute_parity5 Dan Williams
2007-06-27  1:51 ` [md-accel PATCH 16/19] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2007-08-27 13:11   ` saeed bishara
2007-08-27 13:14     ` saeed bishara
2007-08-27 19:31       ` Williams, Dan J
2007-08-30 18:43         ` saeed bishara
2007-08-30 20:41           ` Dan Williams
2007-06-27  1:52 ` [md-accel PATCH 17/19] iop13xx: surface the iop13xx adma units to the iop-adma driver Dan Williams
2007-06-27  1:52 ` [md-accel PATCH 18/19] iop3xx: surface the iop3xx DMA and AAU " Dan Williams
2007-06-27  1:52 ` [md-accel PATCH 19/19] ARM: Add drivers/dma to arch/arm/Kconfig Dan Williams
2007-06-27  3:49 ` [md-accel PATCH 00/19] md raid acceleration and the async_tx api Mr. James W. Laferriere
2007-06-27  4:02   ` Dan Williams
2007-06-27 16:45 ` Bill Davidsen
2007-06-27 17:09   ` Williams, Dan J

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:34fcda0 dfblob:7c688f6 )
 OR (
bs:"[md-accel PATCH 08/19] md: common infrastructure for running operations with raid5_run_ops" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070627015115.18962.76642.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=christopher.leech@intel.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=jeff@garzik.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=shannon.nelson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).