All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, jeff@garzik.org, christopher.leech@intel.com,
	akpm@osdl.org
Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, olof@lixom.net
Subject: [PATCH 05/12] md: workqueue for raid5 operations
Date: Thu, 30 Nov 2006 13:10:25 -0700	[thread overview]
Message-ID: <20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <e9c3a7c20611301155p4069c642j276d7705b0f81447@mail.gmail.com>

From: Dan Williams <dan.j.williams@intel.com>

Each raid5 device gets its own queue, and each stripe has its own
work_struct.  The goal is to have a free running raid5d thread, i.e. reduce
the time the stripe lock is held by removing bulk memory operations, and
removing the sleeping path in generic_make_request.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/md/raid5.c         |   37 +++++++++++++++++++++++++++++++++----
 include/linux/raid/raid5.h |    6 ++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 232f525..c2312d1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -126,6 +126,7 @@ static void __release_stripe(raid5_conf_
 			}
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
+			BUG_ON(sh->ops.pending);
 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 				atomic_dec(&conf->preread_active_stripes);
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -324,6 +325,15 @@ static struct stripe_head *get_active_st
 	return sh;
 }
 
+static inline void issue_raid_ops(struct stripe_head *sh)
+{
+	raid5_conf_t *conf = sh->raid_conf;
+
+	atomic_inc(&sh->count);
+	conf->workqueue_stripes++;
+	queue_work(sh->raid_conf->workqueue, &sh->ops.work);
+}
+
 static int
 raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error);
 static int
@@ -868,6 +878,10 @@ static void raid5_run_ops(void *stripe_h
 	} else if (sh->ops.count < 0)
 		BUG();
 
+	/* we kick off work to the engines in batches */
+	if (--(conf->workqueue_stripes) == 0)
+		async_tx_issue_pending_all();
+
 	spin_unlock(&sh->lock);
 
 	set_bit(STRIPE_HANDLE, &sh->state);
@@ -883,6 +897,7 @@ static int grow_one_stripe(raid5_conf_t 
 	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
 	sh->raid_conf = conf;
 	spin_lock_init(&sh->lock);
+	INIT_WORK(&sh->ops.work, raid5_run_ops, sh);
 
 	if (grow_buffers(sh, conf->raid_disks)) {
 		shrink_buffers(sh, conf->raid_disks);
@@ -1923,7 +1938,6 @@ static int stripe_to_pdidx(sector_t stri
  *    schedule a write of some buffers
  *    return confirmation of parity correctness
  *
- * Parity calculations are done inside the stripe lock
  * buffers are taken off read_list or write_list, and bh_cache buffers
  * get BH_Lock set before the stripe lock is released.
  *
@@ -1942,9 +1956,9 @@ static void handle_stripe5(struct stripe
 	int failed_num=0;
 	struct r5dev *dev;
 
-	PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
-		(unsigned long long)sh->sector, atomic_read(&sh->count),
-		sh->pd_idx);
+	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d ops=%lx:%lx:%lx\n",
+	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
+	       sh->pd_idx, sh->ops.pending, sh->ops.ack, sh->ops.complete);
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2409,6 +2423,10 @@ #endif
 			}
 	}
 
+	if (sh->ops.count && !test_and_set_bit(STRIPE_OPSQUEUE_ACTIVE, &sh->state)) {
+		issue_raid_ops(sh);
+	}
+
 	spin_unlock(&sh->lock);
 
 	while ((bi=return_bi)) {
@@ -3717,6 +3735,13 @@ static int run(mddev_t *mddev)
 		if (!conf->spare_page)
 			goto abort;
 	}
+
+	sprintf(conf->workqueue_name, "%s_raid5_ops",
+		mddev->gendisk->disk_name);
+
+	if ((conf->workqueue = create_workqueue(conf->workqueue_name)) == NULL)
+		goto abort;
+
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
@@ -3726,6 +3751,7 @@ static int run(mddev_t *mddev)
 	INIT_LIST_HEAD(&conf->inactive_list);
 	atomic_set(&conf->active_stripes, 0);
 	atomic_set(&conf->preread_active_stripes, 0);
+	conf->workqueue_stripes = 0;
 
 	PRINTK("raid5: run(%s) called.\n", mdname(mddev));
 
@@ -3879,6 +3905,8 @@ abort:
 		safe_put_page(conf->spare_page);
 		kfree(conf->disks);
 		kfree(conf->stripe_hashtbl);
+		if (conf->workqueue)
+			destroy_workqueue(conf->workqueue);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -3899,6 +3927,7 @@ static int stop(mddev_t *mddev)
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 	kfree(conf->disks);
+	destroy_workqueue(conf->workqueue);
 	kfree(conf);
 	mddev->private = NULL;
 	return 0;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index a1c3f85..c77154a 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -3,6 +3,7 @@ #define _RAID5_H
 
 #include <linux/raid/md.h>
 #include <linux/raid/xor.h>
+#include <linux/workqueue.h>
 
 /*
  *
@@ -170,6 +171,7 @@ struct stripe_head {
 		unsigned long	   pending;  /* pending operations (set for request->issue->complete) */
 		unsigned long	   ack;	     /* submitted operations (set for issue->complete */
 		unsigned long	   complete; /* completed operations flags (set for complete) */
+		struct work_struct work;     /* move ops from request to issue to complete */
 		int		   target;   /* STRIPE_OP_COMPUTE_BLK target */
 		int		   count;    /* workqueue runs when this is non-zero */
 		u32		   zero_sum_result;
@@ -289,11 +291,15 @@ struct raid5_private_data {
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
 
 	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
+
+	struct workqueue_struct *workqueue;
+	int			workqueue_stripes; /* stripes awaiting raid5_run_ops service */
 	/* unfortunately we need two cache names as we temporarily have
 	 * two caches.
 	 */
 	int			active_name;
 	char			cache_name[2][20];
+	char			workqueue_name[20];
 	kmem_cache_t		*slab_cache; /* for allocating stripes */
 
 	int			seq_flush, seq_write;

  parent reply	other threads:[~2006-11-30 20:10 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-11-30 19:55 [PATCH 00/12] md raid acceleration and the async_tx api Dan Williams
2006-11-30 20:10 ` [PATCH 01/12] dmaengine: add base support for " Dan Williams
2006-11-30 20:10 ` [PATCH 02/12] dmaengine: add " Dan Williams
2006-12-01  1:19   ` Dan Williams
2006-11-30 20:10 ` [PATCH 03/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2006-11-30 20:10 ` [PATCH 04/12] md: add raid5_run_ops and support routines Dan Williams
2006-11-30 20:10 ` Dan Williams [this message]
2006-11-30 20:10 ` [PATCH 06/12] md: move write operations to raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 07/12] md: move raid5 compute block " Dan Williams
2006-11-30 20:10 ` [PATCH 08/12] md: move raid5 parity checks " Dan Williams
2006-11-30 20:10 ` [PATCH 09/12] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 10/12] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2006-11-30 20:10 ` [PATCH 11/12] md: raid5 io requests to raid5_run_ops Dan Williams
2006-11-30 20:11 ` [PATCH 12/12] md: remove raid5 compute_block and compute_parity5 Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@osdl.org \
    --cc=christopher.leech@intel.com \
    --cc=jeff@garzik.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=olof@lixom.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.