From: Tejun Heo <tj@kernel.org>
To: laijs@cn.fujitsu.com
Cc: axboe@kernel.dk, jack@suse.cz, fengguang.wu@intel.com,
jmoyer@redhat.com, zab@redhat.com, linux-kernel@vger.kernel.org,
herbert@gondor.apana.org.au, davem@davemloft.net,
linux-crypto@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 08/14] workqueue: map an unbound workqueues to multiple per-node pool_workqueues
Date: Wed, 27 Mar 2013 23:43:34 -0700 [thread overview]
Message-ID: <1364453020-2829-9-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1364453020-2829-1-git-send-email-tj@kernel.org>
Currently, an unbound workqueue has only one "current" pool_workqueue
associated with it. It may have multple pool_workqueues but only the
first pool_workqueue servies new work items. For NUMA affinity, we
want to change this so that there are multiple current pool_workqueues
serving different NUMA nodes.
Introduce workqueue->numa_pwq_tbl[] which is indexed by NUMA node and
points to the pool_workqueue to use for each possible node. This
replaces first_pwq() in __queue_work() and workqueue_congested().
numa_pwq_tbl[] is currently initialized to point to the same
pool_workqueue as first_pwq() so this patch doesn't make any behavior
changes.
v2: Use rcu_dereference_raw() in unbound_pwq_by_node() as the function
may be called only with wq->mutex held.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/workqueue.c | 48 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 37 insertions(+), 11 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9297ea3..5b53705 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -257,6 +257,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
};
static struct kmem_cache *pwq_cache;
@@ -525,6 +526,22 @@ static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
pwqs_node);
}
+/**
+ * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
+ * @wq: the target workqueue
+ * @node: the node ID
+ *
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
+ */
+static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
+ int node)
+{
+ assert_rcu_or_wq_mutex(wq);
+ return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
+}
+
static unsigned int work_color_to_flags(int color)
{
return color << WORK_STRUCT_COLOR_SHIFT;
@@ -1278,14 +1295,14 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
WARN_ON_ONCE(!is_chained_work(wq)))
return;
retry:
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
+
/* pwq which will be used unless @work is executing elsewhere */
- if (!(wq->flags & WQ_UNBOUND)) {
- if (cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
+ if (!(wq->flags & WQ_UNBOUND))
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
- } else {
- pwq = first_pwq(wq);
- }
+ else
+ pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
/*
* If @work was previously on a different pool, it might still be
@@ -1315,8 +1332,8 @@ retry:
* pwq is determined and locked. For unbound pools, we could have
* raced with pwq release and it could already be dead. If its
* refcnt is zero, repeat pwq selection. Note that pwqs never die
- * without another pwq replacing it as the first pwq or while a
- * work item is executing on it, so the retying is guaranteed to
+ * without another pwq replacing it in the numa_pwq_tbl or while
+ * work items are executing on it, so the retrying is guaranteed to
* make forward-progress.
*/
if (unlikely(!pwq->refcnt)) {
@@ -3614,6 +3631,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
struct worker_pool *pool,
struct pool_workqueue **p_last_pwq)
{
+ int node;
+
BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
pwq->pool = pool;
@@ -3640,8 +3659,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
/* link in @pwq */
list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
- if (wq->flags & WQ_UNBOUND)
+ if (wq->flags & WQ_UNBOUND) {
copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
+ for_each_node(node)
+ rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+ }
mutex_unlock(&wq->mutex);
}
@@ -3756,12 +3778,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct lock_class_key *key,
const char *lock_name, ...)
{
+ size_t tbl_size = 0;
va_list args;
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
/* allocate wq and format name */
- wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (flags & WQ_UNBOUND)
+ tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
+
+ wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
if (!wq)
return NULL;
@@ -3989,7 +4015,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
if (!(wq->flags & WQ_UNBOUND))
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
else
- pwq = first_pwq(wq);
+ pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
rcu_read_unlock_sched();
--
1.8.1.4
next prev parent reply other threads:[~2013-03-28 6:43 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-28 6:43 Subject: [PATCHSET v2 wq/for-3.10] workqueue: NUMA affinity for unbound workqueues Tejun Heo
2013-03-28 6:43 ` [PATCH 01/14] workqueue: move pwq_pool_locking outside of get/put_unbound_pool() Tejun Heo
2013-03-28 6:43 ` [PATCH 02/14] workqueue: add wq_numa_tbl_len and wq_numa_possible_cpumask[] Tejun Heo
2013-03-28 6:43 ` [PATCH 03/14] workqueue: drop 'H' from kworker names of unbound worker pools Tejun Heo
2013-03-28 6:43 ` [PATCH 04/14] workqueue: determine NUMA node of workers accourding to the allowed cpumask Tejun Heo
2013-03-28 6:43 ` [PATCH 05/14] workqueue: add workqueue->unbound_attrs Tejun Heo
2013-03-28 6:43 ` [PATCH 06/14] workqueue: make workqueue->name[] fixed len Tejun Heo
2013-03-28 6:43 ` [PATCH 07/14] workqueue: move hot fields of workqueue_struct to the end Tejun Heo
2013-03-28 6:43 ` Tejun Heo [this message]
2013-03-28 6:43 ` [PATCH 09/14] workqueue: break init_and_link_pwq() into two functions and introduce alloc_unbound_pwq() Tejun Heo
2013-03-28 6:43 ` [PATCH 10/14] workqueue: use NUMA-aware allocation for pool_workqueues Tejun Heo
2013-03-28 6:43 ` [PATCH 11/14] workqueue: introduce numa_pwq_tbl_install() Tejun Heo
2013-03-28 6:43 ` [PATCH 12/14] workqueue: introduce put_pwq_unlocked() Tejun Heo
2013-03-28 6:43 ` [PATCH 13/14] workqueue: implement NUMA affinity for unbound workqueues Tejun Heo
2013-03-29 22:44 ` [PATCH v4 " Tejun Heo
[not found] ` <CACvQF50c3m3eMiGKctagoOe6s3uhehfFy733imBfnLKTXSqZ4A@mail.gmail.com>
[not found] ` <CAOS58YOgwB4s4-2e528T6SV36pDxLS3Zx+b5eR0L2kQjiZBEnw@mail.gmail.com>
2013-03-30 17:23 ` Lai Jiangshan
2013-03-31 19:06 ` Tejun Heo
2013-04-01 18:28 ` [PATCH v5 " Tejun Heo
2013-03-28 6:43 ` [PATCH 14/14] workqueue: update sysfs interface to reflect NUMA awareness and a kernel param to disable NUMA affinity Tejun Heo
2013-04-01 18:27 ` [PATCH 0.5/14] workqueue: fix memory leak in apply_workqueue_attrs() Tejun Heo
2013-04-01 18:29 ` Subject: [PATCHSET v2 wq/for-3.10] workqueue: NUMA affinity for unbound workqueues Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1364453020-2829-9-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=axboe@kernel.dk \
--cc=davem@davemloft.net \
--cc=fengguang.wu@intel.com \
--cc=herbert@gondor.apana.org.au \
--cc=jack@suse.cz \
--cc=jmoyer@redhat.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=zab@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).