From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org
Cc: Daniel Walker <dwalker@mvista.com>,
Steven Rostedt <rostedt@goodmis.org>, Ingo Molnar <mingo@elte.hu>,
Thomas Gleixner <tglx@linutronix.de>,
Gregory Haskins <ghaskins@novell.com>,
Oleg Nesterov <oleg@tv-sign.ru>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC/PATCH 5/5] rt: PI-workqueue: fix barriers
Date: Tue, 23 Oct 2007 14:04:02 +0200 [thread overview]
Message-ID: <20071023120745.616140000@chello.nl> (raw)
In-Reply-To: 20071023120357.782284000@chello.nl
[-- Attachment #1: rt-workqueue-barrier.patch --]
[-- Type: text/plain, Size: 8091 bytes --]
The plist change to the workqueues left the barrier functionality broken.
The barrier is used for two things:
- wait_on_work(), and
- flush_cpu_workqueue().
wait_on_work() - uses the barrier to wait on the completion of the currently
worklet. This was done by inserting a completion barrier at the very head of
the worklist. With plist this would be the head of the highest prio.
In order to do that, we extend the priority range to exceed the normal range
and enqueue it higher than anything else. Another noteworthy point is that
this high prio worklet must not boost the prio further than the waiting task's
prio, even though we enqueue it at prio 100.
flush_cpu_workqueue() - is a full ordering barrier, although as the name
suggests usually used to wait for the worklist to drain. We'll support the
full ordering semantics currently present. This means that:
W10, W22, W65, B, W80, B, W99
[ where Wn is a worklet at prio n, and B a barrier ]
would most likely execute in the following order:
W10@99, W65@99, W22@99, W80@99, W99
[ Wn@m is Wn executed at prio m ]
[ W10 would be first because it can start executing while the others
are being added ]
Whereas without the barriers it would be:
W10@99, W99, W80, W65, W22
The prio ordering of the plist makes it hard to impose an extra order on top.
The solution used is to nest plist structures. The example will look like:
W10, B(B(W65, W22), W80), W99
That is, the barrier will splice the worklist into itself, and enqueue itself
as the next item to run (very first item, highest prio). The barrier will then
run its own plist to completion before 'popping' back to the regular worklist.
To avoid callstack nesting, run_workqueue is taught about this barrier stack.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/workqueue.c | 111 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 95 insertions(+), 16 deletions(-)
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -36,6 +36,8 @@
#include <asm/uaccess.h>
+struct wq_full_barrier;
+
/*
* The per-CPU workqueue (if single thread, we always use the first
* possible cpu).
@@ -52,6 +54,8 @@ struct cpu_workqueue_struct {
struct task_struct *thread;
int run_depth; /* Detect run_workqueue() recursion depth */
+
+ struct wq_full_barrier *barrier;
} ____cacheline_aligned;
/*
@@ -125,10 +129,8 @@ struct cpu_workqueue_struct *get_wq_data
}
static void insert_work(struct cpu_workqueue_struct *cwq,
- struct work_struct *work, int tail)
+ struct work_struct *work, int prio, int boost_prio)
{
- int prio = current->normal_prio;
-
set_wq_data(work, cwq);
/*
* Ensure that we get the right work->data if we see the
@@ -138,8 +140,8 @@ static void insert_work(struct cpu_workq
plist_node_init(&work->entry, prio);
plist_add(&work->entry, &cwq->worklist);
- if (prio < cwq->thread->prio)
- task_setprio(cwq->thread, prio);
+ if (boost_prio < cwq->thread->prio)
+ task_setprio(cwq->thread, boost_prio);
wake_up(&cwq->more_work);
}
@@ -150,7 +152,7 @@ static void __queue_work(struct cpu_work
unsigned long flags;
spin_lock_irqsave(&cwq->lock, flags);
- insert_work(cwq, work, 1);
+ insert_work(cwq, work, current->normal_prio, current->normal_prio);
spin_unlock_irqrestore(&cwq->lock, flags);
}
@@ -257,8 +259,20 @@ static void leak_check(void *func)
dump_stack();
}
+struct wq_full_barrier {
+ struct work_struct work;
+ struct plist_head worklist;
+ struct wq_full_barrier *prev_barrier;
+ int prev_prio;
+ int waiter_prio;
+ struct cpu_workqueue_struct *cwq;
+ struct completion done;
+};
+
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
+ struct plist_head *worklist = &cwq->worklist;
+
spin_lock_irq(&cwq->lock);
cwq->run_depth++;
if (cwq->run_depth > 3) {
@@ -267,16 +281,27 @@ static void run_workqueue(struct cpu_wor
__FUNCTION__, cwq->run_depth);
dump_stack();
}
- while (!plist_head_empty(&cwq->worklist)) {
- struct work_struct *work = plist_first_entry(&cwq->worklist,
+
+again:
+ while (!plist_head_empty(worklist)) {
+ int prio;
+ struct work_struct *work = plist_first_entry(worklist,
struct work_struct, entry);
work_func_t f = work->func;
- if (likely(cwq->thread->prio != work->entry.prio))
- task_setprio(cwq->thread, work->entry.prio);
+ prio = work->entry.prio;
+ if (unlikely(worklist != &cwq->worklist)) {
+ prio = min(prio, cwq->barrier->prev_prio);
+ prio = min(prio, cwq->barrier->waiter_prio);
+ prio = min(prio, plist_first(&cwq->worklist)->prio);
+ }
+ prio = max(prio, 0);
+
+ if (likely(cwq->thread->prio != prio))
+ task_setprio(cwq->thread, prio);
cwq->current_work = work;
- plist_del(&work->entry, &cwq->worklist);
+ plist_del(&work->entry, worklist);
plist_node_init(&work->entry, MAX_PRIO);
spin_unlock_irq(&cwq->lock);
@@ -289,7 +314,27 @@ static void run_workqueue(struct cpu_wor
spin_lock_irq(&cwq->lock);
cwq->current_work = NULL;
+
+ if (unlikely(cwq->barrier))
+ worklist = &cwq->barrier->worklist;
+ }
+
+ if (unlikely(worklist != &cwq->worklist)) {
+ struct wq_full_barrier *barrier = cwq->barrier;
+
+ BUG_ON(!barrier);
+ cwq->barrier = barrier->prev_barrier;
+ complete(&barrier->done);
+
+ if (unlikely(cwq->barrier))
+ worklist = &cwq->barrier->worklist;
+ else
+ worklist = &cwq->worklist;
+
+ if (!plist_head_empty(worklist))
+ goto again;
}
+
task_setprio(cwq->thread, current->normal_prio);
cwq->run_depth--;
spin_unlock_irq(&cwq->lock);
@@ -336,14 +381,47 @@ static void wq_barrier_func(struct work_
}
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
- struct wq_barrier *barr, int tail)
+ struct wq_barrier *barr, int prio)
{
INIT_WORK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
init_completion(&barr->done);
- insert_work(cwq, &barr->work, tail);
+ insert_work(cwq, &barr->work, prio, current->prio);
+}
+
+static void wq_full_barrier_func(struct work_struct *work)
+{
+ struct wq_full_barrier *barrier =
+ container_of(work, struct wq_full_barrier, work);
+ struct cpu_workqueue_struct *cwq = barrier->cwq;
+ int prio = MAX_PRIO;
+
+ spin_lock_irq(&cwq->lock);
+ barrier->prev_barrier = cwq->barrier;
+ if (cwq->barrier) {
+ prio = min(prio, cwq->barrier->waiter_prio);
+ prio = min(prio, plist_first(&cwq->barrier->worklist)->prio);
+ }
+ barrier->prev_prio = prio;
+ cwq->barrier = barrier;
+ spin_unlock_irq(&cwq->lock);
+}
+
+static void insert_wq_full_barrier(struct cpu_workqueue_struct *cwq,
+ struct wq_full_barrier *barr)
+{
+ INIT_WORK(&barr->work, wq_full_barrier_func);
+ __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+
+ plist_head_init(&barr->worklist, NULL);
+ plist_head_splice(&cwq->worklist, &barr->worklist);
+ barr->cwq = cwq;
+ init_completion(&barr->done);
+ barr->waiter_prio = current->prio;
+
+ insert_work(cwq, &barr->work, 0, current->prio);
}
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -358,13 +436,13 @@ static int flush_cpu_workqueue(struct cp
run_workqueue(cwq);
active = 1;
} else {
- struct wq_barrier barr;
+ struct wq_full_barrier barr;
active = 0;
spin_lock_irq(&cwq->lock);
if (!plist_head_empty(&cwq->worklist) ||
cwq->current_work != NULL) {
- insert_wq_barrier(cwq, &barr, 1);
+ insert_wq_full_barrier(cwq, &barr);
active = 1;
}
spin_unlock_irq(&cwq->lock);
@@ -448,7 +526,7 @@ static void wait_on_cpu_work(struct cpu_
spin_lock_irq(&cwq->lock);
if (unlikely(cwq->current_work == work)) {
- insert_wq_barrier(cwq, &barr, 0);
+ insert_wq_barrier(cwq, &barr, -1);
running = 1;
}
spin_unlock_irq(&cwq->lock);
@@ -759,6 +837,7 @@ init_cpu_workqueue(struct workqueue_stru
spin_lock_init(&cwq->lock);
plist_head_init(&cwq->worklist, NULL);
init_waitqueue_head(&cwq->more_work);
+ cwq->barrier = NULL;
return cwq;
}
--
next prev parent reply other threads:[~2007-10-23 12:09 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
2007-10-23 12:03 ` [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio Peter Zijlstra
2007-10-23 12:03 ` [RFC/PATCH 2/5] rt: list_splice2 Peter Zijlstra
2007-10-23 14:08 ` Steven Rostedt
2007-10-23 12:04 ` [RFC/PATCH 3/5] rt: plist_head_splice Peter Zijlstra
2007-10-23 15:10 ` Steven Rostedt
2007-10-23 16:26 ` Peter Zijlstra
2007-10-23 17:45 ` Peter Zijlstra
2007-10-23 12:04 ` [RFC/PATCH 4/5] rt: PI-workqueue support Peter Zijlstra
2007-10-23 12:04 ` Peter Zijlstra [this message]
2007-10-23 19:22 ` [RFC/PATCH 6/5] rt: PI-workqueue: wait_on_work() fixup Peter Zijlstra
2007-10-23 19:22 ` [RFC/PATCH 7/5] rt: PI-workqueue: propagate prio for delayed work Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071023120745.616140000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=dwalker@mvista.com \
--cc=ghaskins@novell.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=oleg@tv-sign.ru \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox