All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shli@kernel.org>
To: NeilBrown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org, tj@kernel.org, dan.j.williams@gmail.com
Subject: Re: [patch v3 5/5] raid5: only wakeup necessary threads
Date: Thu, 29 Aug 2013 15:40:32 +0800	[thread overview]
Message-ID: <20130829074032.GA24012@kernel.org> (raw)
In-Reply-To: <20130828141304.5e6df26e@notabene.brown>

On Wed, Aug 28, 2013 at 02:13:04PM +1000, NeilBrown wrote:
> On Tue, 27 Aug 2013 17:50:43 +0800 Shaohua Li <shli@kernel.org> wrote:
> 
> 
> > @@ -229,8 +233,26 @@ static void raid5_wakeup_stripe_thread(s
> >  
> >  	group = conf->worker_groups + cpu_to_group(sh->cpu);
> >  
> > -	for (i = 0; i < conf->worker_cnt_per_group; i++)
> > -		queue_work_on(sh->cpu, raid5_wq, &group->workers[i].work);
> > +	group->workers[0].working = true;
> > +	/* at least one worker should run to avoid race */
> > +	queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
> > +
> > +	thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
> > +	/* wakeup more workers */
> > +	for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
> > +		if (group->workers[i].working == false) {
> > +			group->workers[i].working = true;
> > +			queue_work_on(sh->cpu, raid5_wq,
> > +				      &group->workers[i].work);
> > +			thread_cnt--;
> > +		} else if (group->workers[i].working_cnt <=
> > +			   MAX_STRIPE_BATCH / 2)
> > +			/*
> > +			 * If a worker has no enough stripes handling, assume
> > +			 * it will fetch more stripes soon.
> > +			 */
> > +			thread_cnt--;
> > +	}
> >  }
> 
> I don't really understand this  "working_cnt <= MAX_STRIPE_BATCH / 2"
> heuristic.  It is at best a very coarse estimate of how long until the worker
> will get some more stripes to work on.
> I think I would simply not count any thread that is already working (except
> the first, which is always counted whether it is working or not)
> Do you see some particular gain from the counting?

Ok, looks no difference, I removed it.
 
> > -#define MAX_STRIPE_BATCH 8
> > -static int handle_active_stripes(struct r5conf *conf, int group)
> > +static int handle_active_stripes(struct r5conf *conf, int group,
> > +		struct r5worker *worker)
> >  {
> >  	struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
> >  	int i, batch_size = 0;
> > @@ -4921,6 +4955,9 @@ static int handle_active_stripes(struct
> >  			(sh = __get_priority_stripe(conf, group)) != NULL)
> >  		batch[batch_size++] = sh;
> >  
> > +	if (worker)
> > +		worker->working_cnt = batch_size;
> > +
> >  	if (batch_size == 0)
> >  		return batch_size;
> 
> I think this could possibly return with ->working still 'true'.
> I think it is safest to clear it on every exit from the function

Ok, this one doesn't matter too, I fixed it.
 
Subject:raid5: only wakeup necessary threads

If there are no enough stripes to handle, we'd better not always queue all
available work_structs. If one worker can only handle small or even none
stripes, it will impact request merge and create lock contention.

With this patch, the number of work_struct running will depend on pending
stripes number. Note: some statistics info used in the patch are accessed without
locking protection. This should doesn't matter, we just try best to avoid queue
unnecessary work_struct.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 drivers/md/raid5.c |   44 ++++++++++++++++++++++++++++++++++++++------
 drivers/md/raid5.h |    4 ++++
 2 files changed, 42 insertions(+), 6 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2013-08-28 16:49:51.744491417 +0800
+++ linux/drivers/md/raid5.c	2013-08-29 15:35:57.420380732 +0800
@@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq
 #define BYPASS_THRESHOLD	1
 #define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
+#define MAX_STRIPE_BATCH	8
 
 static inline struct hlist_head *stripe_hash_list(struct r5conf *conf,
 						sector_t sect)
@@ -267,6 +268,7 @@ static void raid5_wakeup_stripe_thread(s
 {
 	struct r5conf *conf = sh->raid_conf;
 	struct r5worker_group *group;
+	int thread_cnt;
 	int i, cpu = sh->cpu;
 
 	if (!cpu_online(cpu)) {
@@ -278,6 +280,8 @@ static void raid5_wakeup_stripe_thread(s
 		struct r5worker_group *group;
 		group = conf->worker_groups + cpu_to_group(cpu);
 		list_add_tail(&sh->lru, &group->handle_list);
+		group->stripes_cnt++;
+		sh->group = group;
 	}
 
 	if (conf->worker_cnt_per_group == 0) {
@@ -287,8 +291,20 @@ static void raid5_wakeup_stripe_thread(s
 
 	group = conf->worker_groups + cpu_to_group(sh->cpu);
 
-	for (i = 0; i < conf->worker_cnt_per_group; i++)
-		queue_work_on(sh->cpu, raid5_wq, &group->workers[i].work);
+	group->workers[0].working = true;
+	/* at least one worker should run to avoid race */
+	queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
+
+	thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
+	/* wakeup more workers */
+	for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
+		if (group->workers[i].working == false) {
+			group->workers[i].working = true;
+			queue_work_on(sh->cpu, raid5_wq,
+				      &group->workers[i].work);
+			thread_cnt--;
+		}
+	}
 }
 
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
@@ -673,6 +689,10 @@ get_active_stripe(struct r5conf *conf, s
 				    !test_bit(STRIPE_EXPANDING, &sh->state))
 					BUG();
 				list_del_init(&sh->lru);
+				if (sh->group) {
+					sh->group->stripes_cnt--;
+					sh->group = NULL;
+				}
 			}
 		}
 	} while (sh == NULL);
@@ -4259,15 +4279,18 @@ static struct stripe_head *__get_priorit
 {
 	struct stripe_head *sh = NULL, *tmp;
 	struct list_head *handle_list = NULL;
+	struct r5worker_group *wg = NULL;
 
 	if (conf->worker_cnt_per_group == 0) {
 		handle_list = &conf->handle_list;
 	} else if (group != ANY_GROUP) {
 		handle_list = &conf->worker_groups[group].handle_list;
+		wg = &conf->worker_groups[group];
 	} else {
 		int i;
 		for (i = 0; i < conf->group_cnt; i++) {
 			handle_list = &conf->worker_groups[i].handle_list;
+			wg = &conf->worker_groups[i];
 			if (!list_empty(handle_list))
 				break;
 		}
@@ -4314,11 +4337,16 @@ static struct stripe_head *__get_priorit
 			if (conf->bypass_count < 0)
 				conf->bypass_count = 0;
 		}
+		wg = NULL;
 	}
 
 	if (!sh)
 		return NULL;
 
+	if (wg) {
+		wg->stripes_cnt--;
+		sh->group = NULL;
+	}
 	list_del_init(&sh->lru);
 	atomic_inc(&sh->count);
 	BUG_ON(atomic_read(&sh->count) != 1);
@@ -5025,8 +5053,8 @@ static int  retry_aligned_read(struct r5
 	return handled;
 }
 
-#define MAX_STRIPE_BATCH 8
-static int handle_active_stripes(struct r5conf *conf, int group)
+static int handle_active_stripes(struct r5conf *conf, int group,
+		struct r5worker *worker)
 {
 	struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
 	int i, batch_size = 0;
@@ -5035,6 +5063,9 @@ static int handle_active_stripes(struct
 			(sh = __get_priority_stripe(conf, group)) != NULL)
 		batch[batch_size++] = sh;
 
+	if (worker)
+		worker->working_cnt = batch_size;
+
 	if (batch_size == 0)
 		return batch_size;
 	spin_unlock_irq(&conf->device_lock);
@@ -5069,7 +5100,8 @@ static void raid5_do_work(struct work_st
 
 		released = release_stripe_list(conf);
 
-		batch_size = handle_active_stripes(conf, group_id);
+		batch_size = handle_active_stripes(conf, group_id, worker);
+		worker->working = false;
 		if (!batch_size && !released)
 			break;
 		handled += batch_size;
@@ -5131,7 +5163,7 @@ static void raid5d(struct md_thread *thr
 			handled++;
 		}
 
-		batch_size = handle_active_stripes(conf, ANY_GROUP);
+		batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
 		if (!batch_size && !released)
 			break;
 		handled += batch_size;
Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h	2013-08-28 16:49:51.744491417 +0800
+++ linux/drivers/md/raid5.h	2013-08-28 16:49:51.744491417 +0800
@@ -214,6 +214,7 @@ struct stripe_head {
 	enum reconstruct_states reconstruct_state;
 	spinlock_t		stripe_lock;
 	int			cpu;
+	struct r5worker_group	*group;
 	/**
 	 * struct stripe_operations
 	 * @target - STRIPE_OP_COMPUTE_BLK target
@@ -370,12 +371,15 @@ struct disk_info {
 struct r5worker {
 	struct work_struct work;
 	struct r5worker_group *group;
+	int working_cnt:8;
+	bool working;
 };
 
 struct r5worker_group {
 	struct list_head handle_list;
 	struct r5conf *conf;
 	struct r5worker *workers;
+	int stripes_cnt;
 };
 
 #define NR_STRIPE_HASH_LOCKS 8

  parent reply	other threads:[~2013-08-29  7:40 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-27  9:50 [patch v3 0/5] raid5: make stripe handling multi-threading Shaohua Li
2013-08-27  9:50 ` [patch v3 1/5] raid5: make release_stripe lockless Shaohua Li
2013-08-28 14:04   ` Tejun Heo
2013-08-28 14:29     ` Shaohua Li
2013-08-28 14:30       ` Tejun Heo
2013-08-27  9:50 ` [patch v3 2/5] raid5: fix stripe release order Shaohua Li
2013-08-28  3:41   ` NeilBrown
2013-08-28  6:29     ` Shaohua Li
2013-08-28  6:37       ` NeilBrown
2013-08-27  9:50 ` [patch v3 3/5] raid5: offload stripe handle to workqueue Shaohua Li
2013-08-28  3:53   ` NeilBrown
2013-08-28  6:30     ` Shaohua Li
2013-08-28  6:56       ` NeilBrown
2013-08-27  9:50 ` [patch v3 4/5] raid5: sysfs entry to control worker thread number Shaohua Li
2013-08-27  9:50 ` [patch v3 5/5] raid5: only wakeup necessary threads Shaohua Li
2013-08-28  4:13   ` NeilBrown
2013-08-28  6:31     ` Shaohua Li
2013-08-28  6:59       ` NeilBrown
2013-08-29  7:40     ` Shaohua Li [this message]
2013-09-02  0:45       ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130829074032.GA24012@kernel.org \
    --to=shli@kernel.org \
    --cc=dan.j.williams@gmail.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.