From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, awalls@radix.net,
linux-kernel@vger.kernel.org, jeff@garzik.org, mingo@elte.hu,
akpm@linux-foundation.org, jens.axboe@oracle.com,
rusty@rustcorp.com.au, cl@linux-foundation.org,
dhowells@redhat.com, arjan@linux.intel.com, avi@redhat.com,
peterz@infradead.org, johannes@sipsolutions.net,
andi@firstfloor.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 22/27] workqueue: implement worker states
Date: Fri, 18 Dec 2009 21:58:03 +0900 [thread overview]
Message-ID: <1261141088-2014-23-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1261141088-2014-1-git-send-email-tj@kernel.org>
Implement worker states. After created, a worker is STARTED. While a
worker isn't processing a work, it's IDLE and chained on
gcwq->idle_list. While processing a work, a worker is BUSY and
chained on gcwq->busy_hash. Also, gcwq now counts the number of all
workers and idle ones.
worker_thread() is restructured to reflect state transitions.
cwq->more_work is removed and waking up a worker makes it check for
events. A worker is killed by setting DIE flag while it's IDLE and
waking it up.
This gives gcwq better visibility of what's going on and allows it to
find out whether a work is executing quickly which is necessary to
have multiple workers processing the same cwq.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/workqueue.c | 207 ++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 170 insertions(+), 37 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 91c924c..104f72f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,17 @@
#include <linux/lockdep.h>
#include <linux/idr.h>
+enum {
+ /* worker flags */
+ WORKER_STARTED = 1 << 0, /* started */
+ WORKER_DIE = 1 << 1, /* die die die */
+ WORKER_IDLE = 1 << 2, /* is idle */
+
+ BUSY_WORKER_HASH_ORDER = 5, /* 32 pointers */
+ BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
+ BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
+};
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -51,11 +62,18 @@ struct global_cwq;
struct cpu_workqueue_struct;
struct worker {
+ /* on idle list while idle, on busy hash table while busy */
+ union {
+ struct list_head entry; /* L: while idle */
+ struct hlist_node hentry; /* L: while busy */
+ };
+
struct work_struct *current_work; /* L: work being processed */
struct list_head scheduled; /* L: scheduled works */
struct task_struct *task; /* I: worker task */
struct global_cwq *gcwq; /* I: the associated gcwq */
struct cpu_workqueue_struct *cwq; /* I: the associated cwq */
+ unsigned int flags; /* L: flags */
int id; /* I: worker id */
};
@@ -65,6 +83,15 @@ struct worker {
struct global_cwq {
spinlock_t lock; /* the gcwq lock */
unsigned int cpu; /* I: the associated cpu */
+
+ int nr_workers; /* L: total number of workers */
+ int nr_idle; /* L: currently idle ones */
+
+ /* workers are chained either in the idle_list or busy_hash */
+ struct list_head idle_list; /* L: list of idle workers */
+ struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
+ /* L: hash of busy workers */
+
struct ida worker_ida; /* L: for worker IDs */
} ____cacheline_aligned_in_smp;
@@ -77,7 +104,6 @@ struct global_cwq {
struct cpu_workqueue_struct {
struct global_cwq *gcwq; /* I: the associated gcwq */
struct list_head worklist;
- wait_queue_head_t more_work;
struct worker *worker;
struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */
@@ -300,6 +326,33 @@ static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
}
/**
+ * busy_worker_head - return the busy hash head for a work
+ * @gcwq: gcwq of interest
+ * @work: work to be hashed
+ *
+ * Return hash head of @gcwq for @work.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ *
+ * RETURNS:
+ * Pointer to the hash head.
+ */
+static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
+ struct work_struct *work)
+{
+ const int base_shift = ilog2(sizeof(struct work_struct));
+ unsigned long v = (unsigned long)work;
+
+ /* simple shift and fold hash, do we need something better? */
+ v >>= base_shift;
+ v += v >> BUSY_WORKER_HASH_ORDER;
+ v &= BUSY_WORKER_HASH_MASK;
+
+ return &gcwq->busy_hash[v];
+}
+
+/**
* insert_work - insert a work into cwq
* @cwq: cwq @work belongs to
* @work: work to insert
@@ -325,7 +378,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
smp_wmb();
list_add_tail(&work->entry, head);
- wake_up(&cwq->more_work);
+ wake_up_process(cwq->worker->task);
}
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
@@ -463,13 +516,59 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+/**
+ * worker_enter_idle - enter idle state
+ * @worker: worker which is entering idle state
+ *
+ * @worker is entering idle state. Update stats and idle timer if
+ * necessary.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void worker_enter_idle(struct worker *worker)
+{
+ struct global_cwq *gcwq = worker->gcwq;
+
+ BUG_ON(worker->flags & WORKER_IDLE);
+ BUG_ON(!list_empty(&worker->entry) &&
+ (worker->hentry.next || worker->hentry.pprev));
+
+ worker->flags |= WORKER_IDLE;
+ gcwq->nr_idle++;
+
+ /* idle_list is LIFO */
+ list_add(&worker->entry, &gcwq->idle_list);
+}
+
+/**
+ * worker_leave_idle - leave idle state
+ * @worker: worker which is leaving idle state
+ *
+ * @worker is leaving idle state. Update stats.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void worker_leave_idle(struct worker *worker)
+{
+ struct global_cwq *gcwq = worker->gcwq;
+
+ BUG_ON(!(worker->flags & WORKER_IDLE));
+ worker->flags &= ~WORKER_IDLE;
+ gcwq->nr_idle--;
+ list_del_init(&worker->entry);
+}
+
static struct worker *alloc_worker(void)
{
struct worker *worker;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
- if (worker)
+ if (worker) {
+ INIT_LIST_HEAD(&worker->entry);
INIT_LIST_HEAD(&worker->scheduled);
+ }
return worker;
}
@@ -534,13 +633,16 @@ fail:
* start_worker - start a newly created worker
* @worker: worker to start
*
- * Start @worker.
+ * Make the gcwq aware of @worker and start it.
*
* CONTEXT:
* spin_lock_irq(gcwq->lock).
*/
static void start_worker(struct worker *worker)
{
+ worker->flags |= WORKER_STARTED;
+ worker->gcwq->nr_workers++;
+ worker_enter_idle(worker);
wake_up_process(worker->task);
}
@@ -548,7 +650,10 @@ static void start_worker(struct worker *worker)
* destroy_worker - destroy a workqueue worker
* @worker: worker to be destroyed
*
- * Destroy @worker.
+ * Destroy @worker and adjust @gcwq stats accordingly.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which is released and regrabbed.
*/
static void destroy_worker(struct worker *worker)
{
@@ -559,12 +664,21 @@ static void destroy_worker(struct worker *worker)
BUG_ON(worker->current_work);
BUG_ON(!list_empty(&worker->scheduled));
+ if (worker->flags & WORKER_STARTED)
+ gcwq->nr_workers--;
+ if (worker->flags & WORKER_IDLE)
+ gcwq->nr_idle--;
+
+ list_del_init(&worker->entry);
+ worker->flags |= WORKER_DIE;
+
+ spin_unlock_irq(&gcwq->lock);
+
kthread_stop(worker->task);
kfree(worker);
spin_lock_irq(&gcwq->lock);
ida_remove(&gcwq->worker_ida, id);
- spin_unlock_irq(&gcwq->lock);
}
/**
@@ -680,6 +794,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
{
struct cpu_workqueue_struct *cwq = worker->cwq;
struct global_cwq *gcwq = cwq->gcwq;
+ struct hlist_head *bwh = busy_worker_head(gcwq, work);
work_func_t f = work->func;
int work_color;
#ifdef CONFIG_LOCKDEP
@@ -694,6 +809,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
#endif
/* claim and process */
debug_work_deactivate(work);
+ hlist_add_head(&worker->hentry, bwh);
worker->current_work = work;
work_color = work_flags_to_color(*work_data_bits(work));
list_del_init(&work->entry);
@@ -721,6 +837,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
spin_lock_irq(&gcwq->lock);
/* we're done with it, release */
+ hlist_del_init(&worker->hentry);
worker->current_work = NULL;
cwq_dec_nr_in_flight(cwq, work_color);
}
@@ -757,42 +874,52 @@ static int worker_thread(void *__worker)
struct worker *worker = __worker;
struct global_cwq *gcwq = worker->gcwq;
struct cpu_workqueue_struct *cwq = worker->cwq;
- DEFINE_WAIT(wait);
- for (;;) {
- prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
- if (!kthread_should_stop() &&
- list_empty(&cwq->worklist))
- schedule();
- finish_wait(&cwq->more_work, &wait);
+woke_up:
+ spin_lock_irq(&gcwq->lock);
- if (kthread_should_stop())
- break;
+ /* DIE can be set only while we're idle, checking here is enough */
+ if (worker->flags & WORKER_DIE) {
+ spin_unlock_irq(&gcwq->lock);
+ return 0;
+ }
- spin_lock_irq(&gcwq->lock);
+ worker_leave_idle(worker);
- while (!list_empty(&cwq->worklist)) {
- struct work_struct *work =
- list_first_entry(&cwq->worklist,
- struct work_struct, entry);
-
- if (likely(!(*work_data_bits(work) &
- WORK_STRUCT_LINKED))) {
- /* optimization path, not strictly necessary */
- process_one_work(worker, work);
- if (unlikely(!list_empty(&worker->scheduled)))
- process_scheduled_works(worker);
- } else {
- move_linked_works(work, &worker->scheduled,
- NULL);
+ /*
+ * ->scheduled list can only be filled while a worker is
+ * preparing to process a work or actually processing it.
+ * Make sure nobody diddled with it while I was sleeping.
+ */
+ BUG_ON(!list_empty(&worker->scheduled));
+
+ while (!list_empty(&cwq->worklist)) {
+ struct work_struct *work =
+ list_first_entry(&cwq->worklist,
+ struct work_struct, entry);
+
+ if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
+ /* optimization path, not strictly necessary */
+ process_one_work(worker, work);
+ if (unlikely(!list_empty(&worker->scheduled)))
process_scheduled_works(worker);
- }
+ } else {
+ move_linked_works(work, &worker->scheduled, NULL);
+ process_scheduled_works(worker);
}
-
- spin_unlock_irq(&gcwq->lock);
}
- return 0;
+ /*
+ * gcwq->lock is held and there's no work to process, sleep.
+ * Workers are woken up only while holding gcwq->lock, so
+ * setting the current state before releasing gcwq->lock is
+ * enough to prevent losing any event.
+ */
+ worker_enter_idle(worker);
+ __set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&gcwq->lock);
+ schedule();
+ goto woke_up;
}
struct wq_barrier {
@@ -1551,7 +1678,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
cwq->max_active = max_active;
INIT_LIST_HEAD(&cwq->worklist);
INIT_LIST_HEAD(&cwq->delayed_works);
- init_waitqueue_head(&cwq->more_work);
if (failed)
continue;
@@ -1602,7 +1728,7 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
*/
void destroy_workqueue(struct workqueue_struct *wq)
{
- int cpu;
+ unsigned int cpu;
flush_workqueue(wq);
@@ -1619,8 +1745,10 @@ void destroy_workqueue(struct workqueue_struct *wq)
int i;
if (cwq->worker) {
+ spin_lock_irq(&cwq->gcwq->lock);
destroy_worker(cwq->worker);
cwq->worker = NULL;
+ spin_unlock_irq(&cwq->gcwq->lock);
}
for (i = 0; i < WORK_NR_COLORS; i++)
@@ -1835,7 +1963,7 @@ void thaw_workqueues(void)
cwq->nr_active < cwq->max_active)
cwq_activate_first_delayed(cwq);
- wake_up(&cwq->more_work);
+ wake_up_process(cwq->worker->task);
}
spin_unlock_irq(&gcwq->lock);
@@ -1850,6 +1978,7 @@ out_unlock:
void __init init_workqueues(void)
{
unsigned int cpu;
+ int i;
/*
* cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
@@ -1869,6 +1998,10 @@ void __init init_workqueues(void)
spin_lock_init(&gcwq->lock);
gcwq->cpu = cpu;
+ INIT_LIST_HEAD(&gcwq->idle_list);
+ for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
+
ida_init(&gcwq->worker_ida);
}
--
1.6.4.2
next prev parent reply other threads:[~2009-12-18 12:59 UTC|newest]
Thread overview: 104+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-18 12:57 Tejun Heo
2009-12-18 12:57 ` [PATCH 01/27] sched: rename preempt_notifiers to sched_notifiers and refactor implementation Tejun Heo
2009-12-18 12:57 ` [PATCH 02/27] sched: refactor try_to_wake_up() Tejun Heo
2009-12-18 12:57 ` [PATCH 03/27] sched: implement __set_cpus_allowed() Tejun Heo
2009-12-18 12:57 ` [PATCH 04/27] sched: make sched_notifiers unconditional Tejun Heo
2009-12-18 12:57 ` [PATCH 05/27] sched: add wakeup/sleep sched_notifiers and allow NULL notifier ops Tejun Heo
2009-12-18 12:57 ` [PATCH 06/27] sched: implement try_to_wake_up_local() Tejun Heo
2009-12-18 12:57 ` [PATCH 07/27] acpi: use queue_work_on() instead of binding workqueue worker to cpu0 Tejun Heo
2009-12-18 12:57 ` [PATCH 08/27] stop_machine: reimplement without using workqueue Tejun Heo
2009-12-18 12:57 ` [PATCH 09/27] workqueue: misc/cosmetic updates Tejun Heo
2009-12-18 12:57 ` [PATCH 10/27] workqueue: merge feature parameters into flags Tejun Heo
2009-12-18 12:57 ` [PATCH 11/27] workqueue: define both bit position and mask for work flags Tejun Heo
2009-12-18 12:57 ` [PATCH 12/27] workqueue: separate out process_one_work() Tejun Heo
2009-12-18 12:57 ` [PATCH 13/27] workqueue: temporarily disable workqueue tracing Tejun Heo
2009-12-18 12:57 ` [PATCH 14/27] workqueue: kill cpu_populated_map Tejun Heo
2009-12-18 12:57 ` [PATCH 15/27] workqueue: update cwq alignement Tejun Heo
2009-12-18 12:57 ` [PATCH 16/27] workqueue: reimplement workqueue flushing using color coded works Tejun Heo
2009-12-18 12:57 ` [PATCH 17/27] workqueue: introduce worker Tejun Heo
2009-12-18 12:57 ` [PATCH 18/27] workqueue: reimplement work flushing using linked works Tejun Heo
2009-12-18 12:58 ` [PATCH 19/27] workqueue: implement per-cwq active work limit Tejun Heo
2009-12-18 12:58 ` [PATCH 20/27] workqueue: reimplement workqueue freeze using max_active Tejun Heo
2009-12-18 12:58 ` [PATCH 21/27] workqueue: introduce global cwq and unify cwq locks Tejun Heo
2009-12-18 12:58 ` Tejun Heo [this message]
2009-12-18 12:58 ` [PATCH 23/27] workqueue: reimplement CPU hotplugging support using trustee Tejun Heo
2009-12-18 12:58 ` [PATCH 24/27] workqueue: make single thread workqueue shared worker pool friendly Tejun Heo
2009-12-18 12:58 ` [PATCH 25/27] workqueue: use shared worklist and pool all workers per cpu Tejun Heo
2009-12-18 12:58 ` [PATCH 26/27] workqueue: implement concurrency managed dynamic worker pool Tejun Heo
2009-12-18 12:58 ` [PATCH 27/27] workqueue: increase max_active of keventd and kill current_is_keventd() Tejun Heo
2009-12-18 13:00 ` SUBJ: [RFC PATCHSET] concurrency managed workqueue, take#2 Tejun Heo
2009-12-18 13:03 ` Tejun Heo
2009-12-18 13:45 ` workqueue thing Peter Zijlstra
2009-12-18 13:50 ` Andi Kleen
2009-12-18 15:01 ` Arjan van de Ven
2009-12-21 3:19 ` Tejun Heo
2009-12-21 9:17 ` Jens Axboe
2009-12-21 10:35 ` Peter Zijlstra
2009-12-21 11:09 ` Andi Kleen
2009-12-21 11:17 ` Arjan van de Ven
2009-12-21 11:33 ` Andi Kleen
2009-12-21 13:18 ` Tejun Heo
2009-12-21 11:11 ` Arjan van de Ven
2009-12-21 13:22 ` Tejun Heo
2009-12-21 13:53 ` Arjan van de Ven
2009-12-21 14:19 ` Tejun Heo
2009-12-21 15:19 ` Arjan van de Ven
2009-12-22 0:00 ` Tejun Heo
2009-12-22 11:10 ` Peter Zijlstra
2009-12-22 17:20 ` Linus Torvalds
2009-12-22 17:47 ` Peter Zijlstra
2009-12-22 18:07 ` Andi Kleen
2009-12-22 18:20 ` Peter Zijlstra
2009-12-23 8:17 ` Stijn Devriendt
2009-12-23 8:43 ` Peter Zijlstra
2009-12-23 9:01 ` Stijn Devriendt
2009-12-22 18:28 ` Linus Torvalds
2009-12-23 8:06 ` Johannes Berg
2009-12-23 3:37 ` Tejun Heo
2009-12-23 6:52 ` Herbert Xu
2009-12-23 8:00 ` Steffen Klassert
2009-12-23 8:01 ` [PATCH 0/2] Parallel crypto/IPsec v7 Steffen Klassert
2009-12-23 8:03 ` [PATCH 1/2] padata: generic parallelization/serialization interface Steffen Klassert
2009-12-23 8:04 ` [PATCH 2/2] crypto: pcrypt - Add pcrypt crypto parallelization wrapper Steffen Klassert
2010-01-07 5:39 ` [PATCH 0/2] Parallel crypto/IPsec v7 Herbert Xu
2010-01-16 9:44 ` David Miller
2009-12-18 15:30 ` workqueue thing Linus Torvalds
2009-12-18 15:39 ` Ingo Molnar
2009-12-18 15:39 ` Peter Zijlstra
2009-12-18 15:47 ` Linus Torvalds
2009-12-18 15:53 ` Peter Zijlstra
2009-12-21 3:04 ` Tejun Heo
2009-12-21 9:22 ` Peter Zijlstra
2009-12-21 13:30 ` Tejun Heo
2009-12-21 14:26 ` Peter Zijlstra
2009-12-21 23:50 ` Tejun Heo
2009-12-22 11:00 ` Peter Zijlstra
2009-12-22 11:03 ` Peter Zijlstra
2009-12-23 3:43 ` Tejun Heo
2009-12-22 11:04 ` Peter Zijlstra
2009-12-23 3:48 ` Tejun Heo
2009-12-22 11:06 ` Peter Zijlstra
2009-12-23 4:18 ` Tejun Heo
2009-12-23 4:42 ` Linus Torvalds
2009-12-23 6:02 ` Ingo Molnar
2009-12-23 6:13 ` Jeff Garzik
2009-12-23 7:53 ` Ingo Molnar
2009-12-23 8:41 ` Peter Zijlstra
2009-12-23 10:25 ` Jeff Garzik
2009-12-23 13:33 ` Stefan Richter
2009-12-23 14:20 ` Mark Brown
2009-12-23 7:09 ` Tejun Heo
2009-12-23 8:01 ` Ingo Molnar
2009-12-23 8:12 ` Ingo Molnar
2009-12-23 8:32 ` Tejun Heo
2009-12-23 8:42 ` Ingo Molnar
2009-12-23 8:27 ` Tejun Heo
2009-12-23 8:37 ` Ingo Molnar
2009-12-23 8:49 ` Tejun Heo
2009-12-23 8:49 ` Ingo Molnar
2009-12-23 9:03 ` Tejun Heo
2009-12-23 13:40 ` Stefan Richter
2009-12-23 13:43 ` Stefan Richter
2009-12-23 8:25 ` Arjan van de Ven
2009-12-23 13:00 ` Stefan Richter
2009-12-23 8:31 ` Stijn Devriendt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1261141088-2014-23-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=arjan@linux.intel.com \
--cc=avi@redhat.com \
--cc=awalls@radix.net \
--cc=cl@linux-foundation.org \
--cc=dhowells@redhat.com \
--cc=jeff@garzik.org \
--cc=jens.axboe@oracle.com \
--cc=johannes@sipsolutions.net \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=rusty@rustcorp.com.au \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.