From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, jeff@garzik.org, christopher.leech@intel.com,
akpm@osdl.org
Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, olof@lixom.net
Subject: [PATCH 05/12] md: workqueue for raid5 operations
Date: Thu, 30 Nov 2006 13:10:25 -0700 [thread overview]
Message-ID: <20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <e9c3a7c20611301155p4069c642j276d7705b0f81447@mail.gmail.com>
From: Dan Williams <dan.j.williams@intel.com>
Each raid5 device gets its own queue, and each stripe has its own
work_struct. The goal is to have a free running raid5d thread, i.e. reduce
the time the stripe lock is held by removing bulk memory operations, and
removing the sleeping path in generic_make_request.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/md/raid5.c | 37 +++++++++++++++++++++++++++++++++----
include/linux/raid/raid5.h | 6 ++++++
2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 232f525..c2312d1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -126,6 +126,7 @@ static void __release_stripe(raid5_conf_
}
md_wakeup_thread(conf->mddev->thread);
} else {
+ BUG_ON(sh->ops.pending);
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -324,6 +325,15 @@ static struct stripe_head *get_active_st
return sh;
}
+static inline void issue_raid_ops(struct stripe_head *sh)
+{
+ raid5_conf_t *conf = sh->raid_conf;
+
+ atomic_inc(&sh->count);
+ conf->workqueue_stripes++;
+ queue_work(sh->raid_conf->workqueue, &sh->ops.work);
+}
+
static int
raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error);
static int
@@ -868,6 +878,10 @@ static void raid5_run_ops(void *stripe_h
} else if (sh->ops.count < 0)
BUG();
+ /* we kick off work to the engines in batches */
+ if (--(conf->workqueue_stripes) == 0)
+ async_tx_issue_pending_all();
+
spin_unlock(&sh->lock);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -883,6 +897,7 @@ static int grow_one_stripe(raid5_conf_t
memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
sh->raid_conf = conf;
spin_lock_init(&sh->lock);
+ INIT_WORK(&sh->ops.work, raid5_run_ops, sh);
if (grow_buffers(sh, conf->raid_disks)) {
shrink_buffers(sh, conf->raid_disks);
@@ -1923,7 +1938,6 @@ static int stripe_to_pdidx(sector_t stri
* schedule a write of some buffers
* return confirmation of parity correctness
*
- * Parity calculations are done inside the stripe lock
* buffers are taken off read_list or write_list, and bh_cache buffers
* get BH_Lock set before the stripe lock is released.
*
@@ -1942,9 +1956,9 @@ static void handle_stripe5(struct stripe
int failed_num=0;
struct r5dev *dev;
- PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
- (unsigned long long)sh->sector, atomic_read(&sh->count),
- sh->pd_idx);
+ PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d ops=%lx:%lx:%lx\n",
+ (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
+ sh->pd_idx, sh->ops.pending, sh->ops.ack, sh->ops.complete);
spin_lock(&sh->lock);
clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2409,6 +2423,10 @@ #endif
}
}
+ if (sh->ops.count && !test_and_set_bit(STRIPE_OPSQUEUE_ACTIVE, &sh->state)) {
+ issue_raid_ops(sh);
+ }
+
spin_unlock(&sh->lock);
while ((bi=return_bi)) {
@@ -3717,6 +3735,13 @@ static int run(mddev_t *mddev)
if (!conf->spare_page)
goto abort;
}
+
+ sprintf(conf->workqueue_name, "%s_raid5_ops",
+ mddev->gendisk->disk_name);
+
+ if ((conf->workqueue = create_workqueue(conf->workqueue_name)) == NULL)
+ goto abort;
+
spin_lock_init(&conf->device_lock);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
@@ -3726,6 +3751,7 @@ static int run(mddev_t *mddev)
INIT_LIST_HEAD(&conf->inactive_list);
atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0);
+ conf->workqueue_stripes = 0;
PRINTK("raid5: run(%s) called.\n", mdname(mddev));
@@ -3879,6 +3905,8 @@ abort:
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
+ if (conf->workqueue)
+ destroy_workqueue(conf->workqueue);
kfree(conf);
}
mddev->private = NULL;
@@ -3899,6 +3927,7 @@ static int stop(mddev_t *mddev)
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
kfree(conf->disks);
+ destroy_workqueue(conf->workqueue);
kfree(conf);
mddev->private = NULL;
return 0;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index a1c3f85..c77154a 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -3,6 +3,7 @@ #define _RAID5_H
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
+#include <linux/workqueue.h>
/*
*
@@ -170,6 +171,7 @@ struct stripe_head {
unsigned long pending; /* pending operations (set for request->issue->complete) */
unsigned long ack; /* submitted operations (set for issue->complete */
unsigned long complete; /* completed operations flags (set for complete) */
+ struct work_struct work; /* move ops from request to issue to complete */
int target; /* STRIPE_OP_COMPUTE_BLK target */
int count; /* workqueue runs when this is non-zero */
u32 zero_sum_result;
@@ -289,11 +291,15 @@ struct raid5_private_data {
atomic_t preread_active_stripes; /* stripes with scheduled io */
atomic_t reshape_stripes; /* stripes with pending writes for reshape */
+
+ struct workqueue_struct *workqueue;
+ int workqueue_stripes; /* stripes awaiting raid5_run_ops service */
/* unfortunately we need two cache names as we temporarily have
* two caches.
*/
int active_name;
char cache_name[2][20];
+ char workqueue_name[20];
kmem_cache_t *slab_cache; /* for allocating stripes */
int seq_flush, seq_write;
next prev parent reply other threads:[~2006-11-30 20:10 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-30 19:55 [PATCH 00/12] md raid acceleration and the async_tx api Dan Williams
2006-11-30 20:10 ` [PATCH 01/12] dmaengine: add base support for " Dan Williams
2006-11-30 20:10 ` [PATCH 02/12] dmaengine: add " Dan Williams
2006-12-01 1:19 ` Dan Williams
2006-11-30 20:10 ` [PATCH 03/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines Dan Williams
2006-11-30 20:10 ` [PATCH 04/12] md: add raid5_run_ops and support routines Dan Williams
2006-11-30 20:10 ` Dan Williams [this message]
2006-11-30 20:10 ` [PATCH 06/12] md: move write operations to raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 07/12] md: move raid5 compute block " Dan Williams
2006-11-30 20:10 ` [PATCH 08/12] md: move raid5 parity checks " Dan Williams
2006-11-30 20:10 ` [PATCH 09/12] md: satisfy raid5 read requests via raid5_run_ops Dan Williams
2006-11-30 20:10 ` [PATCH 10/12] md: use async_tx and raid5_run_ops for raid5 expansion operations Dan Williams
2006-11-30 20:10 ` [PATCH 11/12] md: raid5 io requests to raid5_run_ops Dan Williams
2006-11-30 20:11 ` [PATCH 12/12] md: remove raid5 compute_block and compute_parity5 Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061130201025.21313.69191.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=akpm@osdl.org \
--cc=christopher.leech@intel.com \
--cc=jeff@garzik.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
--cc=olof@lixom.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.