From: Dan Williams <dan.j.williams@intel.com>
To: linux-raid@vger.kernel.org, linux-btrfs@vger.kernel.org
Subject: [RFC PATCH 2/2] md/raid456: switch to btrq for multicore operation
Date: Wed, 24 Mar 2010 07:53:20 -0700 [thread overview]
Message-ID: <20100324145320.15371.81326.stgit@dwillia2-linux> (raw)
In-Reply-To: <20100324144904.15371.2317.stgit@dwillia2-linux>
The btrfs workqueue is designed for load balancing cpu intensive
operations. Reuse it in md/raid456 for distributing stripe processing
across multiple cores.
---
drivers/md/Kconfig | 1 +
drivers/md/raid5.c | 79 ++++++++++++++++++++++++++++++----------------------
drivers/md/raid5.h | 13 +++++----
3 files changed, 54 insertions(+), 39 deletions(-)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 922c36c..09ade02 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -129,6 +129,7 @@ config MD_RAID456
select ASYNC_PQ
select ASYNC_RAID6_RECOV
select BLK_BBU if MD_RAID456_BBU
+ select BTRQ if MULTICORE_RAID456
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 90d7678..4afa625 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1084,7 +1084,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
&sh->ops.zero_sum_result, percpu->spare_page, &submit);
}
-static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL;
@@ -1149,34 +1149,9 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
-#ifdef CONFIG_MULTICORE_RAID456
-static void async_run_ops(void *param, async_cookie_t cookie)
-{
- struct stripe_head *sh = param;
- unsigned long ops_request = sh->ops.request;
-
- clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state);
- wake_up(&sh->ops.wait_for_ops);
-
- __raid_run_ops(sh, ops_request);
- release_stripe(sh);
-}
-
-static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
-{
- /* since handle_stripe can be called outside of raid5d context
- * we need to ensure sh->ops.request is de-staged before another
- * request arrives
- */
- wait_event(sh->ops.wait_for_ops,
- !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state));
- sh->ops.request = ops_request;
- atomic_inc(&sh->count);
- async_schedule(async_run_ops, sh);
-}
-#else
-#define raid_run_ops __raid_run_ops
+#ifdef CONFIG_MULTICORE_RAID456
+static void stripe_work(struct btrq_work *work);
#endif
static int grow_one_stripe(raid5_conf_t *conf)
@@ -1189,9 +1164,10 @@ static int grow_one_stripe(raid5_conf_t *conf)
memset(sh, 0, sizeof(*sh) + (disks-1)*sizeof(struct r5dev));
sh->raid_conf = conf;
spin_lock_init(&sh->lock);
- #ifdef CONFIG_MULTICORE_RAID456
- init_waitqueue_head(&sh->ops.wait_for_ops);
- #endif
+#ifdef CONFIG_MULTICORE_RAID456
+ sh->work.func = stripe_work;
+ sh->work.flags = 0;
+#endif
if (grow_buffers(sh, disks)) {
shrink_buffers(sh, disks);
@@ -1307,9 +1283,10 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
nsh->raid_conf = conf;
spin_lock_init(&nsh->lock);
- #ifdef CONFIG_MULTICORE_RAID456
- init_waitqueue_head(&nsh->ops.wait_for_ops);
- #endif
+#ifdef CONFIG_MULTICORE_RAID456
+ nsh->work.func = stripe_work;
+ nsh->work.flags = 0;
+#endif
list_add(&nsh->lru, &newstripes);
}
@@ -3441,6 +3418,26 @@ static void handle_stripe6(struct stripe_head *sh)
return_io(return_bi);
}
+#ifdef CONFIG_MULTICORE_RAID456
+static void stripe_work(struct btrq_work *work)
+{
+ struct stripe_head *sh = container_of(work, struct stripe_head, work);
+
+ if (sh->raid_conf->level == 6)
+ handle_stripe6(sh);
+ else
+ handle_stripe5(sh);
+ release_stripe(sh);
+}
+
+static void handle_stripe(struct stripe_head *sh)
+{
+ raid5_conf_t *conf = sh->raid_conf;
+
+ atomic_inc(&sh->count);
+ btrq_queue_worker(&conf->workqueue, &sh->work);
+}
+#else
static void handle_stripe(struct stripe_head *sh)
{
if (sh->raid_conf->level == 6)
@@ -3448,6 +3445,7 @@ static void handle_stripe(struct stripe_head *sh)
else
handle_stripe5(sh);
}
+#endif
static void raid5_activate_delayed(raid5_conf_t *conf)
{
@@ -4546,6 +4544,9 @@ static void free_conf(raid5_conf_t *conf)
raid5_free_percpu(conf);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
+#ifdef CONFIG_MULTICORE_RAID456
+ btrq_stop_workers(&conf->workqueue);
+#endif
kfree(conf);
}
@@ -4683,6 +4684,16 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
atomic_set(&conf->preread_active_stripes, 0);
atomic_set(&conf->active_aligned_reads, 0);
conf->bypass_threshold = BYPASS_THRESHOLD;
+#ifdef CONFIG_MULTICORE_RAID456
+ sprintf(conf->queue_name, "%s_%s", mdname(mddev), mddev->pers->name);
+ btrq_init_workers(&conf->workqueue, conf->queue_name,
+ min_t(unsigned long, num_online_cpus() + 2, 8), NULL);
+ if (btrq_start_workers(&conf->workqueue, 1)) {
+ printk(KERN_ERR "raid5: failed to start thread pool for %s\n",
+ mdname(mddev));
+ goto abort;
+ }
+#endif
conf->raid_disks = mddev->raid_disks;
if (mddev->reshape_position == MaxSector)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index dd70835..81c027b 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -3,6 +3,7 @@
#include <linux/raid/xor.h>
#include <linux/dmaengine.h>
+#include <linux/btrqueue.h>
/*
*
@@ -214,6 +215,9 @@ struct stripe_head {
int disks; /* disks in stripe */
enum check_states check_state;
enum reconstruct_states reconstruct_state;
+#ifdef CONFIG_MULTICORE_RAID456
+ struct btrq_work work;
+#endif
/**
* struct stripe_operations
* @target - STRIPE_OP_COMPUTE_BLK target
@@ -224,10 +228,6 @@ struct stripe_head {
struct stripe_operations {
int target, target2;
enum sum_check_flags zero_sum_result;
- #ifdef CONFIG_MULTICORE_RAID456
- unsigned long request;
- wait_queue_head_t wait_for_ops;
- #endif
} ops;
struct r5dev {
struct bio req;
@@ -302,7 +302,6 @@ struct r6_state {
#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
#define STRIPE_BIOFILL_RUN 14
#define STRIPE_COMPUTE_RUN 15
-#define STRIPE_OPS_REQ_PENDING 16
/*
* Operation request flags
@@ -382,6 +381,10 @@ struct raid5_private_data {
int bypass_count; /* bypassed prereads */
int bypass_threshold; /* preread nice */
struct list_head *last_hold; /* detect hold_list promotions */
+#ifdef CONFIG_MULTICORE_RAID456
+ struct btrq_workers workqueue;
+ char queue_name[20];
+#endif
atomic_t reshape_stripes; /* stripes with pending writes for reshape */
/* unfortunately we need two cache names as we temporarily have
next prev parent reply other threads:[~2010-03-24 14:53 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-24 14:53 [RFC PATCH 0/2] more raid456 thread pool experimentation Dan Williams
2010-03-24 14:53 ` [RFC PATCH 1/2] btrq: uplevel the btrfs thread pool for md/raid456 usage Dan Williams
2010-03-24 14:53 ` Dan Williams [this message]
2010-03-24 15:51 ` [RFC PATCH 0/2] more raid456 thread pool experimentation Chris Mason
2010-03-24 18:06 ` Dan Williams
2010-03-24 18:06 ` Dan Williams
2010-03-24 19:31 ` Chris Mason
2010-03-24 19:31 ` Chris Mason
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100324145320.15371.81326.stgit@dwillia2-linux \
--to=dan.j.williams@intel.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.