linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, jeff@garzik.org, christopher.leech@intel.com,
	akpm@osdl.org
Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, olof@lixom.net
Subject: [PATCH 05/12] md: workqueue for raid5 operations
Date: Thu, 30 Nov 2006 13:10:25 -0700	[thread overview]
Message-ID: <20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <e9c3a7c20611301155p4069c642j276d7705b0f81447@mail.gmail.com>

From: Dan Williams <dan.j.williams@intel.com>

Each raid5 device gets its own queue, and each stripe has its own
work_struct.  The goal is to have a free running raid5d thread, i.e. reduce
the time the stripe lock is held by removing bulk memory operations, and
removing the sleeping path in generic_make_request.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c         |   37 +++++++++++++++++++++++++++++++++----
 include/linux/raid/raid5.h |    6 ++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 232f525..c2312d1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -126,6 +126,7 @@ static void __release_stripe(raid5_conf_
 			}
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
+			BUG_ON(sh->ops.pending);
 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 				atomic_dec(&conf->preread_active_stripes);
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -324,6 +325,15 @@ static struct stripe_head *get_active_st
 	return sh;
 }
 
+static inline void issue_raid_ops(struct stripe_head *sh)
+{
+	raid5_conf_t *conf = sh->raid_conf;
+
+	atomic_inc(&sh->count);
+	conf->workqueue_stripes++;
+	queue_work(sh->raid_conf->workqueue, &sh->ops.work);
+}
+
 static int
 raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error);
 static int
@@ -868,6 +878,10 @@ static void raid5_run_ops(void *stripe_h
 	} else if (sh->ops.count < 0)
 		BUG();
 
+	/* we kick off work to the engines in batches */
+	if (--(conf->workqueue_stripes) == 0)
+		async_tx_issue_pending_all();
+
 	spin_unlock(&sh->lock);
 
 	set_bit(STRIPE_HANDLE, &sh->state);
@@ -883,6 +897,7 @@ static int grow_one_stripe(raid5_conf_t 
 	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
 	sh->raid_conf = conf;
 	spin_lock_init(&sh->lock);
+	INIT_WORK(&sh->ops.work, raid5_run_ops, sh);
 
 	if (grow_buffers(sh, conf->raid_disks)) {
 		shrink_buffers(sh, conf->raid_disks);
@@ -1923,7 +1938,6 @@ static int stripe_to_pdidx(sector_t stri
  *    schedule a write of some buffers
  *    return confirmation of parity correctness
  *
- * Parity calculations are done inside the stripe lock
  * buffers are taken off read_list or write_list, and bh_cache buffers
  * get BH_Lock set before the stripe lock is released.
  *
@@ -1942,9 +1956,9 @@ static void handle_stripe5(struct stripe
 	int failed_num=0;
 	struct r5dev *dev;
 
-	PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
-		(unsigned long long)sh->sector, atomic_read(&sh->count),
-		sh->pd_idx);
+	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d ops=%lx:%lx:%lx\n",
+	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
+	       sh->pd_idx, sh->ops.pending, sh->ops.ack, sh->ops.complete);
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2409,6 +2423,10 @@ #endif
 			}
 	}
 
+	if (sh->ops.count && !test_and_set_bit(STRIPE_OPSQUEUE_ACTIVE, &sh->state)) {
+		issue_raid_ops(sh);
+	}
+
 	spin_unlock(&sh->lock);
 
 	while ((bi=return_bi)) {
@@ -3717,6 +3735,13 @@ static int run(mddev_t *mddev)
 		if (!conf->spare_page)
 			goto abort;
 	}
+
+	sprintf(conf->workqueue_name, "%s_raid5_ops",
+		mddev->gendisk->disk_name);
+
+	if ((conf->workqueue = create_workqueue(conf->workqueue_name)) == NULL)
+		goto abort;
+
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
@@ -3726,6 +3751,7 @@ static int run(mddev_t *mddev)
 	INIT_LIST_HEAD(&conf->inactive_list);
 	atomic_set(&conf->active_stripes, 0);
 	atomic_set(&conf->preread_active_stripes, 0);
+	conf->workqueue_stripes = 0;
 
 	PRINTK("raid5: run(%s) called.\n", mdname(mddev));
 
@@ -3879,6 +3905,8 @@ abort:
 		safe_put_page(conf->spare_page);
 		kfree(conf->disks);
 		kfree(conf->stripe_hashtbl);
+		if (conf->workqueue)
+			destroy_workqueue(conf->workqueue);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -3899,6 +3927,7 @@ static int stop(mddev_t *mddev)
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 	kfree(conf->disks);
+	destroy_workqueue(conf->workqueue);
 	kfree(conf);
 	mddev->private = NULL;
 	return 0;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index a1c3f85..c77154a 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -3,6 +3,7 @@ #define _RAID5_H
 
 #include <linux/raid/md.h>
 #include <linux/raid/xor.h>
+#include <linux/workqueue.h>
 
 /*
  *
@@ -170,6 +171,7 @@ struct stripe_head {
 		unsigned long	   pending;  /* pending operations (set for request->issue->complete) */
 		unsigned long	   ack;	     /* submitted operations (set for issue->complete */
 		unsigned long	   complete; /* completed operations flags (set for complete) */
+		struct work_struct work;     /* move ops from request to issue to complete */
 		int		   target;   /* STRIPE_OP_COMPUTE_BLK target */
 		int		   count;    /* workqueue runs when this is non-zero */
 		u32		   zero_sum_result;
@@ -289,11 +291,15 @@ struct raid5_private_data {
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
 
 	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
+
+	struct workqueue_struct *workqueue;
+	int			workqueue_stripes; /* stripes awaiting raid5_run_ops service */
 	/* unfortunately we need two cache names as we temporarily have
 	 * two caches.
 	 */
 	int			active_name;
 	char			cache_name[2][20];
+	char			workqueue_name[20];
 	kmem_cache_t		*slab_cache; /* for allocating stripes */
 
 	int			seq_flush, seq_write;

  parent reply	other threads:[~2006-11-30 20:10 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-11-30 19:55 [PATCH 00/12] md raid acceleration and the async_tx api Dan Williams
2006-11-30 20:10 ` [PATCH 01/12] dmaengine: add base support for " Dan Williams
2006-11-30 20:10 ` [PATCH 02/12] dmaengine: add " Dan Williams
2006-12-01  1:19   ` Dan Williams
2006-11-30 20:10 ` [PATCH 03/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2006-11-30 20:10 ` [PATCH 04/12] md: add raid5_run_ops and support routines Dan Williams
2006-11-30 20:10 ` Dan Williams [this message]
2006-11-30 20:10 ` [PATCH 06/12] md: move write operations to raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 07/12] md: move raid5 compute block " Dan Williams
2006-11-30 20:10 ` [PATCH 08/12] md: move raid5 parity checks " Dan Williams
2006-11-30 20:10 ` [PATCH 09/12] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 10/12] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2006-11-30 20:10 ` [PATCH 11/12] md: raid5 io requests to raid5_run_ops Dan Williams
2006-11-30 20:11 ` [PATCH 12/12] md: remove raid5 compute_block and compute_parity5 Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@osdl.org \
    --cc=christopher.leech@intel.com \
    --cc=jeff@garzik.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=olof@lixom.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).