From: shli@kernel.org
To: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: neilb@suse.de, tj@kernel.org, djbw@fb.com
Subject: [patch 3/3] raid5: only wakeup necessary threads
Date: Tue, 30 Jul 2013 13:52:10 +0800 [thread overview]
Message-ID: <20130730055425.056034691@kernel.org> (raw)
In-Reply-To: 20130730055207.698660010@kernel.org
[-- Attachment #1: raid5-intelligent-wakeup.patch --]
[-- Type: text/plain, Size: 6303 bytes --]
If there are no enough stripes to handle, we'd better now always queue all
available work_structs. If one worker can only handle small or even none
stripes, it will impact request merge and create lock contention.
With this patch, the number of work_struct running will depend on pending
stripes number. Not some statistics info used in the patch are accessed without
locking protection. Yhis should doesn't matter, we just try best to avoid queue
unnecessary work_struct.
Signed-off-by: Shaohua Li <shli@fusionio.com>
---
drivers/md/raid5.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------
drivers/md/raid5.h | 4 ++++
2 files changed, 48 insertions(+), 6 deletions(-)
Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c 2013-07-30 09:44:18.000000000 +0800
+++ linux/drivers/md/raid5.c 2013-07-30 13:03:28.738736366 +0800
@@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq
#define BYPASS_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1)
+#define MAX_STRIPE_BATCH 8
static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
{
@@ -209,6 +210,7 @@ static void raid5_wakeup_stripe_thread(s
{
struct r5conf *conf = sh->raid_conf;
struct r5worker_group *group;
+ int thread_cnt;
int i;
if (conf->worker_cnt_per_group == 0) {
@@ -218,8 +220,26 @@ static void raid5_wakeup_stripe_thread(s
group = conf->worker_groups + cpu_to_group(sh->cpu);
- for (i = 0; i < conf->worker_cnt_per_group; i++)
- queue_work_on(sh->cpu, raid5_wq, &group->workers[i].work);
+ group->workers[0].working = 1;
+ /* at least one worker should run to avoid race */
+ queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
+
+ thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
+ /* wakeup more worker */
+ for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
+ if (group->workers[i].working == 0) {
+ group->workers[i].working = 1;
+ queue_work_on(sh->cpu, raid5_wq,
+ &group->workers[i].work);
+ thread_cnt--;
+ } else if (group->workers[i].working_cnt <=
+ MAX_STRIPE_BATCH / 2)
+ /*
+ * If a worker has no enough stripes handling, assume
+ * it will fetch more stripes soon
+ */
+ thread_cnt--;
+ }
}
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
@@ -248,6 +268,8 @@ static void do_release_stripe(struct r5c
struct r5worker_group *group;
group = conf->worker_groups + cpu_to_group(cpu);
list_add_tail(&sh->lru, &group->handle_list);
+ group->stripes_cnt++;
+ sh->group = group;
}
raid5_wakeup_stripe_thread(sh);
return;
@@ -573,6 +595,10 @@ get_active_stripe(struct r5conf *conf, s
!test_bit(STRIPE_EXPANDING, &sh->state))
BUG();
list_del_init(&sh->lru);
+ if (sh->group) {
+ sh->group->stripes_cnt--;
+ sh->group = NULL;
+ }
}
}
} while (sh == NULL);
@@ -4143,6 +4169,7 @@ static struct stripe_head *__get_priorit
{
struct stripe_head *sh = NULL, *tmp;
struct list_head *handle_list = NULL;
+ struct r5worker_group *wg = NULL;
if (conf->worker_cnt_per_group == 0) {
handle_list = &conf->handle_list;
@@ -4150,6 +4177,7 @@ static struct stripe_head *__get_priorit
handle_list = NULL;
} else if (group != ANY_GROUP) {
handle_list = &conf->worker_groups[group].handle_list;
+ wg = &conf->worker_groups[group];
if (list_empty(handle_list))
handle_list = NULL;
} else {
@@ -4157,6 +4185,7 @@ static struct stripe_head *__get_priorit
/* Should we take action to avoid starvation of latter groups ? */
for (i = 0; i < conf->group_cnt; i++) {
handle_list = &conf->worker_groups[i].handle_list;
+ wg = &conf->worker_groups[i];
if (!list_empty(handle_list))
break;
}
@@ -4205,11 +4234,16 @@ static struct stripe_head *__get_priorit
if (conf->bypass_count < 0)
conf->bypass_count = 0;
}
+ wg = NULL;
}
if (!sh)
return NULL;
+ if (wg) {
+ wg->stripes_cnt--;
+ sh->group = NULL;
+ }
list_del_init(&sh->lru);
atomic_inc(&sh->count);
BUG_ON(atomic_read(&sh->count) != 1);
@@ -4907,8 +4941,8 @@ static int retry_aligned_read(struct r5
return handled;
}
-#define MAX_STRIPE_BATCH 8
-static int handle_active_stripes(struct r5conf *conf, int group)
+static int handle_active_stripes(struct r5conf *conf, int group,
+ struct r5worker *worker)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0;
@@ -4917,6 +4951,9 @@ static int handle_active_stripes(struct
(sh = __get_priority_stripe(conf, group)) != NULL)
batch[batch_size++] = sh;
+ if (worker)
+ worker->working_cnt = batch_size;
+
if (batch_size == 0)
return batch_size;
spin_unlock_irq(&conf->device_lock);
@@ -4951,11 +4988,12 @@ static void raid5_do_work(struct work_st
released = release_stripe_list(conf);
- batch_size = handle_active_stripes(conf, group_id);
+ batch_size = handle_active_stripes(conf, group_id, worker);
if (!batch_size && !released)
break;
handled += batch_size;
}
+ worker->working = 0;
pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock);
@@ -5013,7 +5051,7 @@ static void raid5d(struct md_thread *thr
handled++;
}
- batch_size = handle_active_stripes(conf, ANY_GROUP);
+ batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
if (!batch_size && !released)
break;
handled += batch_size;
Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h 2013-07-30 09:14:22.000000000 +0800
+++ linux/drivers/md/raid5.h 2013-07-30 09:46:22.777233803 +0800
@@ -213,6 +213,7 @@ struct stripe_head {
enum reconstruct_states reconstruct_state;
spinlock_t stripe_lock;
int cpu;
+ struct r5worker_group *group;
/**
* struct stripe_operations
* @target - STRIPE_OP_COMPUTE_BLK target
@@ -369,12 +370,15 @@ struct disk_info {
struct r5worker {
struct work_struct work;
struct r5worker_group *group;
+ int working:1;
+ int working_cnt:8;
};
struct r5worker_group {
struct list_head handle_list;
struct r5conf *conf;
struct r5worker *workers;
+ int stripes_cnt;
};
struct r5conf {
next prev parent reply other threads:[~2013-07-30 5:52 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-30 5:52 [patch 0/3] raid5: make stripe handling multi-threading shli
2013-07-30 5:52 ` [patch 1/3] raid5: offload stripe handle to workqueue shli
2013-07-30 11:46 ` Tejun Heo
2013-07-30 12:53 ` Tejun Heo
2013-07-30 13:07 ` Shaohua Li
2013-07-30 13:57 ` Tejun Heo
2013-07-31 1:24 ` Shaohua Li
2013-07-31 10:33 ` Tejun Heo
2013-08-01 2:01 ` Shaohua Li
2013-08-01 12:15 ` Tejun Heo
2013-07-30 5:52 ` [patch 2/3] raid5: sysfs entry to control worker thread number shli
2013-07-30 5:52 ` shli [this message]
2013-07-30 12:46 ` [patch 3/3] raid5: only wakeup necessary threads Tejun Heo
2013-07-30 13:24 ` Shaohua Li
2013-07-30 14:01 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130730055425.056034691@kernel.org \
--to=shli@kernel.org \
--cc=djbw@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).