* [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 [PATCH 0/3] Introduce simple wait queues Paul Gortmaker
@ 2013-12-12 1:06 ` Paul Gortmaker
2013-12-12 1:58 ` Steven Rostedt
` (4 more replies)
2013-12-12 1:06 ` [PATCH 2/3] sched/core: convert completions to use simple wait queues Paul Gortmaker
2013-12-12 1:06 ` [PATCH 3/3] rcu: use simple wait queues where possible in rcutree Paul Gortmaker
2 siblings, 5 replies; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 1:06 UTC (permalink / raw)
To: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Paul E. McKenney
Cc: Andrew Morton, linux-kernel, Paul Gortmaker
From: Thomas Gleixner <tglx@linutronix.de>
The wait_queue is a swiss army knife and in most of the cases the
full complexity is not needed. Here we provide a slim version, as
it lowers memory consumption and runtime overhead.
The concept originated from RT, where waitqueues are a constant
source of trouble, as we can't convert the head lock to a raw
spinlock due to fancy and long lasting callbacks.
The smp_mb() was added (by Steven Rostedt) to fix a race condition
with swait wakeups vs. adding items to the list.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
[PG: carry forward from multiple v3.10-rt patches to mainline, align
function names with "normal" wait queue names, update commit log.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
include/linux/swait.h | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/Makefile | 2 +-
kernel/swait.c | 118 +++++++++++++++++++++++++++
3 files changed, 339 insertions(+), 1 deletion(-)
create mode 100644 include/linux/swait.h
create mode 100644 kernel/swait.c
diff --git a/include/linux/swait.h b/include/linux/swait.h
new file mode 100644
index 0000000..8cd49b1
--- /dev/null
+++ b/include/linux/swait.h
@@ -0,0 +1,220 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <asm/current.h>
+
+struct swaiter {
+ struct task_struct *task;
+ struct list_head node;
+};
+
+#define DEFINE_SWAITER(name) \
+ struct swaiter name = { \
+ .task = current, \
+ .node = LIST_HEAD_INIT((name).node), \
+ }
+
+struct swait_queue_head {
+ raw_spinlock_t lock;
+ struct list_head task_list;
+};
+
+typedef struct swait_queue_head swait_queue_head_t;
+
+
+#define SWAIT_QUEUE_HEAD_INITIALIZER(name) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .task_list = LIST_HEAD_INIT((name).task_list), \
+ }
+
+#define DEFINE_SWAIT_HEAD(name) \
+ struct swait_queue_head name = SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swaitqueue_head(struct swait_queue_head *h,
+ struct lock_class_key *key);
+
+#define init_swaitqueue_head(swh) \
+ do { \
+ static struct lock_class_key __key; \
+ \
+ __init_swaitqueue_head((swh), &__key); \
+ } while (0)
+
+/*
+ * Waiter functions
+ */
+extern void swait_prepare_locked(struct swait_queue_head *head,
+ struct swaiter *w);
+extern void swait_prepare(struct swait_queue_head *head, struct swaiter *w,
+ int state);
+extern void swait_finish_locked(struct swait_queue_head *head,
+ struct swaiter *w);
+extern void swait_finish(struct swait_queue_head *head, struct swaiter *w);
+
+/* Check whether a head has waiters enqueued */
+static inline bool swaitqueue_active(struct swait_queue_head *h)
+{
+ /* Make sure the condition is visible before checking list_empty() */
+ smp_mb();
+ return !list_empty(&h->task_list);
+}
+
+/*
+ * Wakeup functions
+ */
+extern unsigned int __swake_up(struct swait_queue_head *head,
+ unsigned int state, unsigned int num);
+extern unsigned int __swake_up_locked(struct swait_queue_head *head,
+ unsigned int state, unsigned int num);
+
+#define swake_up(head) \
+ __swake_up(head, TASK_NORMAL, 1)
+#define swake_up_interruptible(head) \
+ __swake_up(head, TASK_INTERRUPTIBLE, 1)
+#define swake_up_all(head) \
+ __swake_up(head, TASK_NORMAL, 0)
+#define swake_up_all_interruptible(head) \
+ __swake_up(head, TASK_INTERRUPTIBLE, 0)
+
+/*
+ * Event API
+ */
+#define __swait_event(wq, condition) \
+do { \
+ DEFINE_SWAITER(__wait); \
+ \
+ for (;;) { \
+ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ schedule(); \
+ } \
+ swait_finish(&wq, &__wait); \
+} while (0)
+
+/**
+ * swait_event - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * swake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define swait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __swait_event(wq, condition); \
+} while (0)
+
+#define __swait_event_interruptible(wq, condition, ret) \
+do { \
+ DEFINE_SWAITER(__wait); \
+ \
+ for (;;) { \
+ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
+ schedule(); \
+ } \
+ swait_finish(&wq, &__wait); \
+} while (0)
+
+#define __swait_event_interruptible_timeout(wq, condition, ret) \
+do { \
+ DEFINE_SWAITER(__wait); \
+ \
+ for (;;) { \
+ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
+ ret = schedule_timeout(ret); \
+ if (!ret) \
+ break; \
+ } \
+ swait_finish(&wq, &__wait); \
+} while (0)
+
+/**
+ * swait_event_interruptible - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * swake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define swait_event_interruptible(wq, condition) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __swait_event_interruptible(wq, condition, __ret); \
+ __ret; \
+})
+
+#define swait_event_interruptible_timeout(wq, condition, timeout) \
+({ \
+ int __ret = timeout; \
+ if (!(condition)) \
+ __swait_event_interruptible_timeout(wq, condition, __ret);\
+ __ret; \
+})
+
+#define __swait_event_timeout(wq, condition, ret) \
+do { \
+ DEFINE_SWAITER(__wait); \
+ \
+ for (;;) { \
+ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ ret = schedule_timeout(ret); \
+ if (!ret) \
+ break; \
+ } \
+ swait_finish(&wq, &__wait); \
+} while (0)
+
+/**
+ * swait_event_timeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * swake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if the @timeout elapsed, and the remaining
+ * jiffies if the condition evaluated to true before the timeout elapsed.
+ */
+#define swait_event_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ if (!(condition)) \
+ __swait_event_timeout(wq, condition, __ret); \
+ __ret; \
+})
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index bbaf7d5..94b0e34 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o \
kthread.o sys_ni.o posix-cpu-timers.o \
hrtimer.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o groups.o smpboot.o
+ async.o range.o groups.o smpboot.o swait.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
diff --git a/kernel/swait.c b/kernel/swait.c
new file mode 100644
index 0000000..c798c46
--- /dev/null
+++ b/kernel/swait.c
@@ -0,0 +1,118 @@
+/*
+ * Simple waitqueues without fancy flags and callbacks
+ *
+ * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Based on kernel/wait.c
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/swait.h>
+
+/* Adds w to head->task_list. Must be called with head->lock locked. */
+static inline void __swait_enqueue(struct swait_queue_head *head,
+ struct swaiter *w)
+{
+ list_add(&w->node, &head->task_list);
+ /* We can't let the condition leak before the setting of head */
+ smp_mb();
+}
+
+/* Removes w from head->task_list. Must be called with head->lock locked. */
+static inline void __swait_dequeue(struct swaiter *w)
+{
+ list_del_init(&w->node);
+}
+
+void __init_swaitqueue_head(struct swait_queue_head *head,
+ struct lock_class_key *key)
+{
+ raw_spin_lock_init(&head->lock);
+ lockdep_set_class(&head->lock, key);
+ INIT_LIST_HEAD(&head->task_list);
+}
+EXPORT_SYMBOL(__init_swaitqueue_head);
+
+void swait_prepare_locked(struct swait_queue_head *head, struct swaiter *w)
+{
+ w->task = current;
+ if (list_empty(&w->node))
+ __swait_enqueue(head, w);
+}
+
+void swait_prepare(struct swait_queue_head *head, struct swaiter *w, int state)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&head->lock, flags);
+ swait_prepare_locked(head, w);
+ __set_current_state(state);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
+}
+EXPORT_SYMBOL(swait_prepare);
+
+void swait_finish_locked(struct swait_queue_head *head, struct swaiter *w)
+{
+ __set_current_state(TASK_RUNNING);
+ if (w->task)
+ __swait_dequeue(w);
+}
+
+void swait_finish(struct swait_queue_head *head, struct swaiter *w)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ if (w->task) {
+ raw_spin_lock_irqsave(&head->lock, flags);
+ __swait_dequeue(w);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
+ }
+}
+EXPORT_SYMBOL(swait_finish);
+
+unsigned int
+__swake_up_locked(struct swait_queue_head *head, unsigned int state,
+ unsigned int num)
+{
+ struct swaiter *curr, *next;
+ int woken = 0;
+
+ list_for_each_entry_safe(curr, next, &head->task_list, node) {
+ if (wake_up_state(curr->task, state)) {
+ __swait_dequeue(curr);
+ /*
+ * The waiting task can free the waiter as
+ * soon as curr->task = NULL is written,
+ * without taking any locks. A memory barrier
+ * is required here to prevent the following
+ * store to curr->task from getting ahead of
+ * the dequeue operation.
+ */
+ smp_wmb();
+ curr->task = NULL;
+ if (++woken == num)
+ break;
+ }
+ }
+ return woken;
+}
+
+unsigned int
+__swake_up(struct swait_queue_head *head, unsigned int state, unsigned int num)
+{
+ unsigned long flags;
+ int woken;
+
+ if (!swaitqueue_active(head))
+ return 0;
+
+ raw_spin_lock_irqsave(&head->lock, flags);
+ woken = __swake_up_locked(head, state, num);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
+ return woken;
+}
+EXPORT_SYMBOL(__swake_up);
--
1.8.5.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
@ 2013-12-12 1:58 ` Steven Rostedt
2013-12-12 2:09 ` Steven Rostedt
` (3 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 1:58 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Wed, 11 Dec 2013 20:06:37 -0500
Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
>
> The wait_queue is a swiss army knife and in most of the cases the
> full complexity is not needed. Here we provide a slim version, as
> it lowers memory consumption and runtime overhead.
>
> The concept originated from RT, where waitqueues are a constant
> source of trouble, as we can't convert the head lock to a raw
> spinlock due to fancy and long lasting callbacks.
>
> The smp_mb() was added (by Steven Rostedt) to fix a race condition
> with swait wakeups vs. adding items to the list.
For this part, you can also add my:
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
I'll also look at these and test them a bit against mainline.
Thanks for doing this!
-- Steve
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> [PG: carry forward from multiple v3.10-rt patches to mainline, align
> function names with "normal" wait queue names, update commit log.]
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
2013-12-12 1:58 ` Steven Rostedt
@ 2013-12-12 2:09 ` Steven Rostedt
2013-12-12 8:03 ` Christoph Hellwig
` (2 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 2:09 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
> --- /dev/null
> +++ b/kernel/swait.c
> @@ -0,0 +1,118 @@
> +/*
> + * Simple waitqueues without fancy flags and callbacks
We should probably have a more detailed description of when to use
simple wait queues verses normal wait queues. These are obviously much
lighter wait, and should be the preferred method unless you need a
feature of the more heavy weight wait queues.
-- Steve
"weight wait" Ha! Don't get to use that very often ;-)
> + *
> + * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
> + *
> + * Based on kernel/wait.c
> + *
> + * For licencing details see kernel-base/COPYING
> + */
> +#include <linux/init.h>
> +#include <linux/export.h>
> +#include <linux/sched.h>
> +#include <linux/swait.h>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
2013-12-12 1:58 ` Steven Rostedt
2013-12-12 2:09 ` Steven Rostedt
@ 2013-12-12 8:03 ` Christoph Hellwig
2013-12-12 10:56 ` Thomas Gleixner
2013-12-12 11:18 ` Peter Zijlstra
2013-12-12 11:44 ` Peter Zijlstra
4 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2013-12-12 8:03 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Paul E. McKenney,
Andrew Morton, linux-kernel
On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
>
> The wait_queue is a swiss army knife and in most of the cases the
> full complexity is not needed. Here we provide a slim version, as
> it lowers memory consumption and runtime overhead.
Might it make more sense to just make the simple one the default and use
the complex one in the few cases that need it?
It would also be good to enumerate those cases. The wake callbacks come
to mind, but I guess there are more?
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 8:03 ` Christoph Hellwig
@ 2013-12-12 10:56 ` Thomas Gleixner
2013-12-12 14:48 ` Paul Gortmaker
0 siblings, 1 reply; 24+ messages in thread
From: Thomas Gleixner @ 2013-12-12 10:56 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Paul Gortmaker, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Paul E. McKenney,
Andrew Morton, linux-kernel
On Thu, 12 Dec 2013, Christoph Hellwig wrote:
> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> > From: Thomas Gleixner <tglx@linutronix.de>
> >
> > The wait_queue is a swiss army knife and in most of the cases the
> > full complexity is not needed. Here we provide a slim version, as
> > it lowers memory consumption and runtime overhead.
>
> Might it make more sense to just make the simple one the default and use
> the complex one in the few cases that need it?
Sure.
> It would also be good to enumerate those cases. The wake callbacks come
> to mind, but I guess there are more?
You can convert everything which uses default_wake_function and does
not use the exclusive wait trickery.
Thanks,
tglx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 10:56 ` Thomas Gleixner
@ 2013-12-12 14:48 ` Paul Gortmaker
2013-12-12 14:55 ` Steven Rostedt
0 siblings, 1 reply; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 14:48 UTC (permalink / raw)
To: Thomas Gleixner, Christoph Hellwig
Cc: Peter Zijlstra, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On 13-12-12 05:56 AM, Thomas Gleixner wrote:
> On Thu, 12 Dec 2013, Christoph Hellwig wrote:
>
>> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
>>> From: Thomas Gleixner <tglx@linutronix.de>
>>>
>>> The wait_queue is a swiss army knife and in most of the cases the
>>> full complexity is not needed. Here we provide a slim version, as
>>> it lowers memory consumption and runtime overhead.
>>
>> Might it make more sense to just make the simple one the default and use
>> the complex one in the few cases that need it?
>
> Sure.
The one major downside of doing it that way, is that it becomes
a flag day event instead of a gradual roll out. Any unexpected
fallout will bisect to the one commit, which kind of would suck,
if we happen to trigger something kooky like the 50w mwait issue,
or break the usb stack somehow.
If we do the flag day type change, where complex wait is what we
have currently, and simple wait is the default, then I guess the
logistics would be something like:
1) git mv wait.[ch] --- > cwait.[ch]
make an empty wait.h include cwait.h temporarily
2) rename all existing functions in cwait.[ch] to have an added
"c" prefix (or similar)
in wait.h, temporarily add a bunch of things like
#define wait_xyz cwait_xyz
so that things still compile and link.
3) track down the users who really need the extra complexity
and have them use cwait calls and include cwait.h
4) bring in the simple wait queue support as wait.c and wait.h
and delete the defines added in step two. This will be the
flag day commit.
Is that what we want to do here?
Paul.
--
>
>> It would also be good to enumerate those cases. The wake callbacks come
>> to mind, but I guess there are more?
>
> You can convert everything which uses default_wake_function and does
> not use the exclusive wait trickery.
>
> Thanks,
>
> tglx
>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 14:48 ` Paul Gortmaker
@ 2013-12-12 14:55 ` Steven Rostedt
2013-12-12 15:25 ` Thomas Gleixner
2013-12-12 15:30 ` Paul Gortmaker
0 siblings, 2 replies; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 14:55 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Christoph Hellwig, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013 09:48:06 -0500
Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
> 1) git mv wait.[ch] --- > cwait.[ch]
> make an empty wait.h include cwait.h temporarily
>
> 2) rename all existing functions in cwait.[ch] to have an added
> "c" prefix (or similar)
>
> in wait.h, temporarily add a bunch of things like
> #define wait_xyz cwait_xyz
> so that things still compile and link.
What about instead change all users of wait_*() to cwait_*().
Then the next steps would be to skip 3 and jump right to 4)
>
> 3) track down the users who really need the extra complexity
> and have them use cwait calls and include cwait.h
>
> 4) bring in the simple wait queue support as wait.c and wait.h
> and delete the defines added in step two. This will be the
> flag day commit.
Not a flag day commit, as no one is using it. Then start converting all
users back to the wait_*() functions one at a time. If something
breaks, we know which one it was.
-- Steve
>
> Is that what we want to do here?
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 14:55 ` Steven Rostedt
@ 2013-12-12 15:25 ` Thomas Gleixner
2013-12-12 15:44 ` Steven Rostedt
2013-12-12 15:30 ` Paul Gortmaker
1 sibling, 1 reply; 24+ messages in thread
From: Thomas Gleixner @ 2013-12-12 15:25 UTC (permalink / raw)
To: Steven Rostedt
Cc: Paul Gortmaker, Christoph Hellwig, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013, Steven Rostedt wrote:
> On Thu, 12 Dec 2013 09:48:06 -0500
> Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> > 1) git mv wait.[ch] --- > cwait.[ch]
> > make an empty wait.h include cwait.h temporarily
> >
> > 2) rename all existing functions in cwait.[ch] to have an added
> > "c" prefix (or similar)
> >
> > in wait.h, temporarily add a bunch of things like
> > #define wait_xyz cwait_xyz
> > so that things still compile and link.
>
> What about instead change all users of wait_*() to cwait_*().
>
> Then the next steps would be to skip 3 and jump right to 4)
>
> >
> > 3) track down the users who really need the extra complexity
> > and have them use cwait calls and include cwait.h
> >
> > 4) bring in the simple wait queue support as wait.c and wait.h
> > and delete the defines added in step two. This will be the
> > flag day commit.
>
> Not a flag day commit, as no one is using it. Then start converting all
> users back to the wait_*() functions one at a time. If something
> breaks, we know which one it was.
I don't think its a good idea to flip the name spaces.
We should rather convert the existing stuff over to cwait or whatever
and keep the swait for the new facility. So it breaks everything which
is trying to use wait*
- out of tree code
- students copying from ldd3
- ...
[ not that I care much about any of that ]
in a very obvious way.
Thanks,
tglx
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 15:25 ` Thomas Gleixner
@ 2013-12-12 15:44 ` Steven Rostedt
0 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 15:44 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Paul Gortmaker, Christoph Hellwig, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013 16:25:42 +0100 (CET)
Thomas Gleixner <tglx@linutronix.de> wrote:
> On Thu, 12 Dec 2013, Steven Rostedt wrote:
> > On Thu, 12 Dec 2013 09:48:06 -0500
> > Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
> >
> > > 1) git mv wait.[ch] --- > cwait.[ch]
> > > make an empty wait.h include cwait.h temporarily
> > >
> > > 2) rename all existing functions in cwait.[ch] to have an added
> > > "c" prefix (or similar)
> > >
> > > in wait.h, temporarily add a bunch of things like
> > > #define wait_xyz cwait_xyz
> > > so that things still compile and link.
> >
> > What about instead change all users of wait_*() to cwait_*().
> >
> > Then the next steps would be to skip 3 and jump right to 4)
> >
> > >
> > > 3) track down the users who really need the extra complexity
> > > and have them use cwait calls and include cwait.h
> > >
> > > 4) bring in the simple wait queue support as wait.c and wait.h
> > > and delete the defines added in step two. This will be the
> > > flag day commit.
> >
> > Not a flag day commit, as no one is using it. Then start converting all
> > users back to the wait_*() functions one at a time. If something
> > breaks, we know which one it was.
>
> I don't think its a good idea to flip the name spaces.
>
> We should rather convert the existing stuff over to cwait or whatever
> and keep the swait for the new facility. So it breaks everything which
> is trying to use wait*
>
> - out of tree code
> - students copying from ldd3
> - ...
> [ not that I care much about any of that ]
>
> in a very obvious way.
>
That still happens in my approach too, in the same step. When the
simple wait queue is added, everything will break that uses it when it
should not have. That includes, out of tree code, students copying
from ldd3, etc.
And it too will break in a very obvious way.
-- Steve
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 14:55 ` Steven Rostedt
2013-12-12 15:25 ` Thomas Gleixner
@ 2013-12-12 15:30 ` Paul Gortmaker
1 sibling, 0 replies; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 15:30 UTC (permalink / raw)
To: Steven Rostedt
Cc: Thomas Gleixner, Christoph Hellwig, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On 13-12-12 09:55 AM, Steven Rostedt wrote:
> On Thu, 12 Dec 2013 09:48:06 -0500
> Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
>> 1) git mv wait.[ch] --- > cwait.[ch]
>> make an empty wait.h include cwait.h temporarily
>>
>> 2) rename all existing functions in cwait.[ch] to have an added
>> "c" prefix (or similar)
>>
>> in wait.h, temporarily add a bunch of things like
>> #define wait_xyz cwait_xyz
>> so that things still compile and link.
>
> What about instead change all users of wait_*() to cwait_*().
This might turn out to be a big patch, but it is largely
just a mechanical sed, so I guess that is OK.
>
> Then the next steps would be to skip 3 and jump right to 4)
>
>>
>> 3) track down the users who really need the extra complexity
>> and have them use cwait calls and include cwait.h
>>
>> 4) bring in the simple wait queue support as wait.c and wait.h
>> and delete the defines added in step two. This will be the
>> flag day commit.
>
> Not a flag day commit, as no one is using it. Then start converting all
> users back to the wait_*() functions one at a time. If something
> breaks, we know which one it was.
Yep, I think that would work and would avoid the flag day problem
that I was dreading.
Paul.
--
>
> -- Steve
>
>>
>> Is that what we want to do here?
>>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
` (2 preceding siblings ...)
2013-12-12 8:03 ` Christoph Hellwig
@ 2013-12-12 11:18 ` Peter Zijlstra
2013-12-12 11:20 ` Peter Zijlstra
2013-12-12 16:17 ` Paul Gortmaker
2013-12-12 11:44 ` Peter Zijlstra
4 siblings, 2 replies; 24+ messages in thread
From: Peter Zijlstra @ 2013-12-12 11:18 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> +/*
> + * Event API
> + */
> +#define __swait_event(wq, condition) \
> +do { \
> + DEFINE_SWAITER(__wait); \
> + \
> + for (;;) { \
> + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
> + if (condition) \
> + break; \
> + schedule(); \
> + } \
> + swait_finish(&wq, &__wait); \
> +} while (0)
> +
> +#define __swait_event_interruptible(wq, condition, ret) \
> +do { \
> + DEFINE_SWAITER(__wait); \
> + \
> + for (;;) { \
> + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
> + if (condition) \
> + break; \
> + if (signal_pending(current)) { \
> + ret = -ERESTARTSYS; \
> + break; \
> + } \
> + schedule(); \
> + } \
> + swait_finish(&wq, &__wait); \
> +} while (0)
> +
> +#define __swait_event_interruptible_timeout(wq, condition, ret) \
> +do { \
> + DEFINE_SWAITER(__wait); \
> + \
> + for (;;) { \
> + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
> + if (condition) \
> + break; \
> + if (signal_pending(current)) { \
> + ret = -ERESTARTSYS; \
> + break; \
> + } \
> + ret = schedule_timeout(ret); \
> + if (!ret) \
> + break; \
> + } \
> + swait_finish(&wq, &__wait); \
> +} while (0)
Urgh, please have a look at ___wait_event() we just killed all the
pointless replication for the normal waitqueues, please don't add more
of it.
> +unsigned int
> +__swake_up_locked(struct swait_queue_head *head, unsigned int state,
> + unsigned int num)
> +{
> + struct swaiter *curr, *next;
> + int woken = 0;
> +
> + list_for_each_entry_safe(curr, next, &head->task_list, node) {
> + if (wake_up_state(curr->task, state)) {
> + __swait_dequeue(curr);
> + /*
> + * The waiting task can free the waiter as
> + * soon as curr->task = NULL is written,
> + * without taking any locks. A memory barrier
> + * is required here to prevent the following
> + * store to curr->task from getting ahead of
> + * the dequeue operation.
> + */
> + smp_wmb();
> + curr->task = NULL;
> + if (++woken == num)
> + break;
> + }
> + }
> + return woken;
> +}
> +
> +unsigned int
> +__swake_up(struct swait_queue_head *head, unsigned int state, unsigned int num)
> +{
> + unsigned long flags;
> + int woken;
> +
> + if (!swaitqueue_active(head))
> + return 0;
> +
> + raw_spin_lock_irqsave(&head->lock, flags);
> + woken = __swake_up_locked(head, state, num);
> + raw_spin_unlock_irqrestore(&head->lock, flags);
> + return woken;
> +}
> +EXPORT_SYMBOL(__swake_up);
Urgh, fail. Do not put unbounded loops in raw_spin_lock.
I think I posted a patch a while back to cure this.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 11:18 ` Peter Zijlstra
@ 2013-12-12 11:20 ` Peter Zijlstra
2013-12-12 14:19 ` Paul Gortmaker
2013-12-12 16:17 ` Paul Gortmaker
1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2013-12-12 11:20 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On Thu, Dec 12, 2013 at 12:18:09PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> > +unsigned int
> > +__swake_up_locked(struct swait_queue_head *head, unsigned int state,
> > + unsigned int num)
> > +{
> > + struct swaiter *curr, *next;
> > + int woken = 0;
> > +
> > + list_for_each_entry_safe(curr, next, &head->task_list, node) {
> > + if (wake_up_state(curr->task, state)) {
> > + __swait_dequeue(curr);
> > + /*
> > + * The waiting task can free the waiter as
> > + * soon as curr->task = NULL is written,
> > + * without taking any locks. A memory barrier
> > + * is required here to prevent the following
> > + * store to curr->task from getting ahead of
> > + * the dequeue operation.
> > + */
> > + smp_wmb();
> > + curr->task = NULL;
> > + if (++woken == num)
> > + break;
> > + }
> > + }
> > + return woken;
> > +}
> > +
> > +unsigned int
> > +__swake_up(struct swait_queue_head *head, unsigned int state, unsigned int num)
> > +{
> > + unsigned long flags;
> > + int woken;
> > +
> > + if (!swaitqueue_active(head))
> > + return 0;
> > +
> > + raw_spin_lock_irqsave(&head->lock, flags);
> > + woken = __swake_up_locked(head, state, num);
> > + raw_spin_unlock_irqrestore(&head->lock, flags);
> > + return woken;
> > +}
> > +EXPORT_SYMBOL(__swake_up);
>
> Urgh, fail. Do not put unbounded loops in raw_spin_lock.
>
> I think I posted a patch a while back to cure this.
tada!
lkml.kernel.org/r/20131004145625.GN3081@twins.programming.kicks-ass.net
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 11:20 ` Peter Zijlstra
@ 2013-12-12 14:19 ` Paul Gortmaker
0 siblings, 0 replies; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 14:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On 13-12-12 06:20 AM, Peter Zijlstra wrote:
> On Thu, Dec 12, 2013 at 12:18:09PM +0100, Peter Zijlstra wrote:
>> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
[...]
>>> +
>>> +unsigned int
>>> +__swake_up(struct swait_queue_head *head, unsigned int state, unsigned int num)
>>> +{
>>> + unsigned long flags;
>>> + int woken;
>>> +
>>> + if (!swaitqueue_active(head))
>>> + return 0;
>>> +
>>> + raw_spin_lock_irqsave(&head->lock, flags);
>>> + woken = __swake_up_locked(head, state, num);
>>> + raw_spin_unlock_irqrestore(&head->lock, flags);
>>> + return woken;
>>> +}
>>> +EXPORT_SYMBOL(__swake_up);
>>
>> Urgh, fail. Do not put unbounded loops in raw_spin_lock.
>>
>> I think I posted a patch a while back to cure this.
>
> tada!
>
> lkml.kernel.org/r/20131004145625.GN3081@twins.programming.kicks-ass.net
Yep, I linked to that at the bottom of the 0/3 -- I was still
hoping we could find a way to somehow do that w/o passing
the flags around between functions... perhaps it isn't possible...
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 11:18 ` Peter Zijlstra
2013-12-12 11:20 ` Peter Zijlstra
@ 2013-12-12 16:17 ` Paul Gortmaker
1 sibling, 0 replies; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 16:17 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On 13-12-12 06:18 AM, Peter Zijlstra wrote:
> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
>> +/*
>> + * Event API
>> + */
>> +#define __swait_event(wq, condition) \
>> +do { \
>> + DEFINE_SWAITER(__wait); \
>> + \
>> + for (;;) { \
>> + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
>> + if (condition) \
>> + break; \
>> + schedule(); \
>> + } \
>> + swait_finish(&wq, &__wait); \
>> +} while (0)
>> +
>> +#define __swait_event_interruptible(wq, condition, ret) \
>> +do { \
>> + DEFINE_SWAITER(__wait); \
>> + \
>> + for (;;) { \
>> + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
>> + if (condition) \
>> + break; \
>> + if (signal_pending(current)) { \
>> + ret = -ERESTARTSYS; \
>> + break; \
>> + } \
>> + schedule(); \
>> + } \
>> + swait_finish(&wq, &__wait); \
>> +} while (0)
>> +
>> +#define __swait_event_interruptible_timeout(wq, condition, ret) \
>> +do { \
>> + DEFINE_SWAITER(__wait); \
>> + \
>> + for (;;) { \
>> + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
>> + if (condition) \
>> + break; \
>> + if (signal_pending(current)) { \
>> + ret = -ERESTARTSYS; \
>> + break; \
>> + } \
>> + ret = schedule_timeout(ret); \
>> + if (!ret) \
>> + break; \
>> + } \
>> + swait_finish(&wq, &__wait); \
>> +} while (0)
>
> Urgh, please have a look at ___wait_event() we just killed all the
> pointless replication for the normal waitqueues, please don't add more
> of it.
Right, I recall seeing that series go by in October ; thanks for
the reminder, I'll clean this up to match what was done in commit
41a1431b178c3b73 and its follow-on commits.
Paul.
--
>
>
>> +unsigned int
>> +__swake_up_locked(struct swait_queue_head *head, unsigned int state,
>> + unsigned int num)
>> +{
>> + struct swaiter *curr, *next;
>> + int woken = 0;
>> +
>> + list_for_each_entry_safe(curr, next, &head->task_list, node) {
>> + if (wake_up_state(curr->task, state)) {
>> + __swait_dequeue(curr);
>> + /*
>> + * The waiting task can free the waiter as
>> + * soon as curr->task = NULL is written,
>> + * without taking any locks. A memory barrier
>> + * is required here to prevent the following
>> + * store to curr->task from getting ahead of
>> + * the dequeue operation.
>> + */
>> + smp_wmb();
>> + curr->task = NULL;
>> + if (++woken == num)
>> + break;
>> + }
>> + }
>> + return woken;
>> +}
>> +
>> +unsigned int
>> +__swake_up(struct swait_queue_head *head, unsigned int state, unsigned int num)
>> +{
>> + unsigned long flags;
>> + int woken;
>> +
>> + if (!swaitqueue_active(head))
>> + return 0;
>> +
>> + raw_spin_lock_irqsave(&head->lock, flags);
>> + woken = __swake_up_locked(head, state, num);
>> + raw_spin_unlock_irqrestore(&head->lock, flags);
>> + return woken;
>> +}
>> +EXPORT_SYMBOL(__swake_up);
>
> Urgh, fail. Do not put unbounded loops in raw_spin_lock.
>
> I think I posted a patch a while back to cure this.
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
` (3 preceding siblings ...)
2013-12-12 11:18 ` Peter Zijlstra
@ 2013-12-12 11:44 ` Peter Zijlstra
2013-12-12 14:42 ` Steven Rostedt
4 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2013-12-12 11:44 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Ingo Molnar, Sebastian Andrzej Siewior,
Steven Rostedt, Paul E. McKenney, Andrew Morton, linux-kernel
On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> +/* Adds w to head->task_list. Must be called with head->lock locked. */
> +static inline void __swait_enqueue(struct swait_queue_head *head,
> + struct swaiter *w)
> +{
> + list_add(&w->node, &head->task_list);
> + /* We can't let the condition leak before the setting of head */
> + smp_mb();
> +}
> +unsigned int
> +__swake_up_locked(struct swait_queue_head *head, unsigned int state,
> + unsigned int num)
> +{
> + struct swaiter *curr, *next;
> + int woken = 0;
> +
> + list_for_each_entry_safe(curr, next, &head->task_list, node) {
> + if (wake_up_state(curr->task, state)) {
> + __swait_dequeue(curr);
> + /*
> + * The waiting task can free the waiter as
> + * soon as curr->task = NULL is written,
> + * without taking any locks. A memory barrier
> + * is required here to prevent the following
> + * store to curr->task from getting ahead of
> + * the dequeue operation.
> + */
> + smp_wmb();
> + curr->task = NULL;
> + if (++woken == num)
> + break;
> + }
> + }
> + return woken;
> +}
Are these two barriers matched or are they both unmatched and thus
probabyl wrong?
In any case the comments need updating to be more explicit.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 11:44 ` Peter Zijlstra
@ 2013-12-12 14:42 ` Steven Rostedt
2013-12-12 16:03 ` Peter Zijlstra
0 siblings, 1 reply; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 14:42 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Paul Gortmaker, Thomas Gleixner, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013 12:44:47 +0100
Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Dec 11, 2013 at 08:06:37PM -0500, Paul Gortmaker wrote:
> > +/* Adds w to head->task_list. Must be called with head->lock locked. */
> > +static inline void __swait_enqueue(struct swait_queue_head *head,
> > + struct swaiter *w)
> > +{
> > + list_add(&w->node, &head->task_list);
> > + /* We can't let the condition leak before the setting of head */
> > + smp_mb();
> > +}
>
> > +unsigned int
> > +__swake_up_locked(struct swait_queue_head *head, unsigned int state,
> > + unsigned int num)
> > +{
> > + struct swaiter *curr, *next;
> > + int woken = 0;
> > +
> > + list_for_each_entry_safe(curr, next, &head->task_list, node) {
> > + if (wake_up_state(curr->task, state)) {
> > + __swait_dequeue(curr);
> > + /*
> > + * The waiting task can free the waiter as
> > + * soon as curr->task = NULL is written,
> > + * without taking any locks. A memory barrier
> > + * is required here to prevent the following
> > + * store to curr->task from getting ahead of
> > + * the dequeue operation.
> > + */
> > + smp_wmb();
> > + curr->task = NULL;
> > + if (++woken == num)
> > + break;
> > + }
> > + }
> > + return woken;
> > +}
>
> Are these two barriers matched or are they both unmatched and thus
> probabyl wrong?
Nope, the two are unrelated. The the smp_wmb() is to synchronize with
the swait_finish() code. When the task wakes up, it checks if w->task
is NULL, and if it is it does not grab the head->lock and does not
dequeue it, it simply exits, where the caller can then free the swaiter
structure.
Without the smp_wmb(), the curr->task can be set to NULL before we
dequeue it, and if the woken up process sees that NULL, it can jump
right to freeing the swaiter structure and cause havoc with this
__swait_dequeue().
The first smp_mb() is about the condition in:
+#define __swait_event(wq, condition) \
+do { \
+ DEFINE_SWAITER(__wait); \
+ \
+ for (;;) { \
+ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ schedule(); \
+ } \
+ swait_finish(&wq, &__wait); \
+} while (0)
without the smp_mb(), it is possible that the condition can leak into
the critical section of swait_prepare() and have the old condition seen
before the task is added to the wait list. My submission of this patch
described it in more details:
https://lkml.org/lkml/2013/8/19/275
>
> In any case the comments need updating to be more explicit.
Yeah, I can add documentation about this as well. The smp_wmb() I think
is probably documented enough, but the two smp_mb()s need a little more
explanation.
-- Steve
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 14:42 ` Steven Rostedt
@ 2013-12-12 16:03 ` Peter Zijlstra
2013-12-12 16:51 ` Steven Rostedt
0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2013-12-12 16:03 UTC (permalink / raw)
To: Steven Rostedt
Cc: Paul Gortmaker, Thomas Gleixner, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, Dec 12, 2013 at 09:42:27AM -0500, Steven Rostedt wrote:
> On Thu, 12 Dec 2013 12:44:47 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> > Are these two barriers matched or are they both unmatched and thus
> > probabyl wrong?
>
> Nope, the two are unrelated. The the smp_wmb() is to synchronize with
> the swait_finish() code. When the task wakes up, it checks if w->task
> is NULL, and if it is it does not grab the head->lock and does not
> dequeue it, it simply exits, where the caller can then free the swaiter
> structure.
>
> Without the smp_wmb(), the curr->task can be set to NULL before we
> dequeue it, and if the woken up process sees that NULL, it can jump
> right to freeing the swaiter structure and cause havoc with this
> __swait_dequeue().
And yet the swait_finish thing does not have a barrier. Unmatched
barriers are highly suspect.
> The first smp_mb() is about the condition in:
>
> +#define __swait_event(wq, condition) \
> +do { \
> + DEFINE_SWAITER(__wait); \
> + \
> + for (;;) { \
> + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
> + if (condition) \
> + break; \
> + schedule(); \
> + } \
> + swait_finish(&wq, &__wait); \
> +} while (0)
>
> without the smp_mb(), it is possible that the condition can leak into
> the critical section of swait_prepare() and have the old condition seen
> before the task is added to the wait list. My submission of this patch
> described it in more details:
>
> https://lkml.org/lkml/2013/8/19/275
still a fail, barriers should not be described in changelogs but in
comments.
Typically such a barrier comes from set_current_state(), the normal
pattern is something like:
set_current_state(TASK_UNINTERRUPTIBLE)
if (!cond)
schedule();
__set_current_state(TASK_RUNNING);
vs
cond = true;
wake_up_process(&foo);
Where set_current_state() implies the mb that separates the task-state
write from the condition read, and wake_up_process() implies enough
barrier to separate the condition write from its task state read.
And this is an explicit pairing.
The only reason you even need an explicit barrier is because you're not
using set_current_state(), which is your entire problem.
> > In any case the comments need updating to be more explicit.
>
> Yeah, I can add documentation about this as well. The smp_wmb() I think
> is probably documented enough, but the two smp_mb()s need a little more
> explanation.
No, the wmb needs to be far more explicit on why its ok (if it is indeed
ok) to not be balanced.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 16:03 ` Peter Zijlstra
@ 2013-12-12 16:51 ` Steven Rostedt
2013-12-12 17:10 ` Steven Rostedt
0 siblings, 1 reply; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 16:51 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Paul Gortmaker, Thomas Gleixner, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013 17:03:23 +0100
Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Dec 12, 2013 at 09:42:27AM -0500, Steven Rostedt wrote:
> > On Thu, 12 Dec 2013 12:44:47 +0100
> > Peter Zijlstra <peterz@infradead.org> wrote:
> > > Are these two barriers matched or are they both unmatched and thus
> > > probabyl wrong?
> >
> > Nope, the two are unrelated. The the smp_wmb() is to synchronize with
> > the swait_finish() code. When the task wakes up, it checks if w->task
> > is NULL, and if it is it does not grab the head->lock and does not
> > dequeue it, it simply exits, where the caller can then free the swaiter
> > structure.
> >
> > Without the smp_wmb(), the curr->task can be set to NULL before we
> > dequeue it, and if the woken up process sees that NULL, it can jump
> > right to freeing the swaiter structure and cause havoc with this
> > __swait_dequeue().
>
> And yet the swait_finish thing does not have a barrier. Unmatched
> barriers are highly suspect.
Well if it sees the task as NULL then it has nothing to do. The
smp_wmb() on the other end guarantees that. If it sees it not NULL it
then goes into the slow path and the spinlocks synchronize everything
else. That is, we don't care if it sees it as not NULL when it was NULL,
because then the slow path just does duplicate work (under a lock)
(removes itself from the queue with list_del_init() which is fine to
call twice).
Add a comment there discussing this won't hurt.
>
> > The first smp_mb() is about the condition in:
> >
> > +#define __swait_event(wq, condition) \
> > +do { \
> > + DEFINE_SWAITER(__wait); \
> > + \
> > + for (;;) { \
> > + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
> > + if (condition) \
> > + break; \
> > + schedule(); \
> > + } \
> > + swait_finish(&wq, &__wait); \
> > +} while (0)
> >
> > without the smp_mb(), it is possible that the condition can leak into
> > the critical section of swait_prepare() and have the old condition seen
> > before the task is added to the wait list. My submission of this patch
> > described it in more details:
> >
> > https://lkml.org/lkml/2013/8/19/275
>
> still a fail, barriers should not be described in changelogs but in
> comments.
I agree.
>
> Typically such a barrier comes from set_current_state(), the normal
> pattern is something like:
>
> set_current_state(TASK_UNINTERRUPTIBLE)
> if (!cond)
> schedule();
> __set_current_state(TASK_RUNNING);
>
> vs
>
> cond = true;
> wake_up_process(&foo);
Hmm, that __set_current_state() in swait_prepare() does indeed seem
buggy. I'm surprised that I didn't catch that, as I'm usually a
stickler with set_current_state() (and I'm very paranoid when it comes
to using __set_current_state()).
I'll have to dig deeper to see why I didn't change that.
Thanks,
-- Steve
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 16:51 ` Steven Rostedt
@ 2013-12-12 17:10 ` Steven Rostedt
2013-12-12 17:17 ` Peter Zijlstra
0 siblings, 1 reply; 24+ messages in thread
From: Steven Rostedt @ 2013-12-12 17:10 UTC (permalink / raw)
To: Steven Rostedt
Cc: Peter Zijlstra, Paul Gortmaker, Thomas Gleixner, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, 12 Dec 2013 11:51:37 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > Typically such a barrier comes from set_current_state(), the normal
> > pattern is something like:
> >
> > set_current_state(TASK_UNINTERRUPTIBLE)
> > if (!cond)
> > schedule();
> > __set_current_state(TASK_RUNNING);
> >
> > vs
> >
> > cond = true;
> > wake_up_process(&foo);
>
> Hmm, that __set_current_state() in swait_prepare() does indeed seem
> buggy. I'm surprised that I didn't catch that, as I'm usually a
> stickler with set_current_state() (and I'm very paranoid when it comes
> to using __set_current_state()).
>
> I'll have to dig deeper to see why I didn't change that.
OK, looking at my irc logs discussing this with Paul McKenney, this was
an optimization:
<rostedt> as set_current_state() may be too big of a heavy weight
<rostedt> It's usually to synchronize between wake ups and state stet
<rostedt> set
<rostedt> but both the set state and the wakeup is within the same spin
lock
So if we break up your code above, we have:
raw_spin_lock_irqsave(&head->lock, flags);
w->task = current;
if (list_empty(&w->node)) {
list_add(&w->node, &head->list);
smp_mb();
}
__set_current_state(state);
raw_spin_unlock_irqrestore(&head->lock, flags);
if (!cond)
schedule();
vs
cond = true;
raw_spin_lock_irqsave(&head->lock, flags);
woken = __swait_wake_locked(head, state, num);
raw_spin_unlock_irqrestore(&head->lock, flags);
That is, the change of state with respect to the list is synchronized
by the head->lock. We only need to synchronize seeing the condition
with the adding to the list. Once we are on the list, we get woken up
regardless.
But I think this is a micro optimization, and probably still buggy, as
I can imagine a race if we are already on the list, and we don't call
the memory barrier and miss seeing the condition after being woken up
and resetting ourselves back to a sleeping state.
Paul,
You can remove the smp_mb() from __swait_enqueue() and instead replace
the __set_current_state() to set_current_state() in
swait_prepare_locked().
Thanks!
-- Steve
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation
2013-12-12 17:10 ` Steven Rostedt
@ 2013-12-12 17:17 ` Peter Zijlstra
0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2013-12-12 17:17 UTC (permalink / raw)
To: Steven Rostedt
Cc: Paul Gortmaker, Thomas Gleixner, Ingo Molnar,
Sebastian Andrzej Siewior, Paul E. McKenney, Andrew Morton,
linux-kernel
On Thu, Dec 12, 2013 at 12:10:15PM -0500, Steven Rostedt wrote:
> So if we break up your code above, we have:
>
> raw_spin_lock_irqsave(&head->lock, flags);
> w->task = current;
> if (list_empty(&w->node)) {
> list_add(&w->node, &head->list);
> smp_mb();
> }
> __set_current_state(state);
> raw_spin_unlock_irqrestore(&head->lock, flags);
>
> if (!cond)
> schedule();
>
the unlock is semi-permeable and would allow the cond test to cross over
and even be satisfied before the state write.
>
> vs
>
> cond = true;
>
> raw_spin_lock_irqsave(&head->lock, flags);
> woken = __swait_wake_locked(head, state, num);
> raw_spin_unlock_irqrestore(&head->lock, flags);
Same here, the lock is semi-permeable and would allow the cond store to
leak down.
In the first case we really need the implied mb of set_current_state(),
the the second case the actual wakeup would still provide the required
barrier.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 2/3] sched/core: convert completions to use simple wait queues
2013-12-12 1:06 [PATCH 0/3] Introduce simple wait queues Paul Gortmaker
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
@ 2013-12-12 1:06 ` Paul Gortmaker
2013-12-12 1:06 ` [PATCH 3/3] rcu: use simple wait queues where possible in rcutree Paul Gortmaker
2 siblings, 0 replies; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 1:06 UTC (permalink / raw)
To: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Paul E. McKenney
Cc: Andrew Morton, linux-kernel, Paul Gortmaker
From: Thomas Gleixner <tglx@linutronix.de>
Completions have no long lasting callbacks and therefore do not need
the complex waitqueue variant. Use simple waitqueues which reduces
the contention on the waitqueue lock.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[PG: carry forward from v3.10-rt, drop RT specific chunks, align with
names that were chosen to match normal waitqueue support.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
include/linux/completion.h | 8 ++++----
include/linux/uprobes.h | 1 +
kernel/sched/completion.c | 34 +++++++++++++++++-----------------
3 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 5d5aaae..d8e6db9 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -8,7 +8,7 @@
* See kernel/sched/completion.c for details.
*/
-#include <linux/wait.h>
+#include <linux/swait.h>
/*
* struct completion - structure used to maintain state for a "completion"
@@ -24,11 +24,11 @@
*/
struct completion {
unsigned int done;
- wait_queue_head_t wait;
+ swait_queue_head_t wait;
};
#define COMPLETION_INITIALIZER(work) \
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+ { 0, SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
#define COMPLETION_INITIALIZER_ONSTACK(work) \
({ init_completion(&work); work; })
@@ -73,7 +73,7 @@ struct completion {
static inline void init_completion(struct completion *x)
{
x->done = 0;
- init_waitqueue_head(&x->wait);
+ init_swaitqueue_head(&x->wait);
}
/**
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 319eae7..e3a167f 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -26,6 +26,7 @@
#include <linux/errno.h>
#include <linux/rbtree.h>
+#include <linux/wait.h>
struct vm_area_struct;
struct mm_struct;
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index a63f4dc..bcdd609 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -30,10 +30,10 @@ void complete(struct completion *x)
{
unsigned long flags;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 1);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
@@ -50,10 +50,10 @@ void complete_all(struct completion *x)
{
unsigned long flags;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 0);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
@@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
- DECLARE_WAITQUEUE(wait, current);
+ DEFINE_SWAITER(wait);
- __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ swait_prepare_locked(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
- __remove_wait_queue(&x->wait, &wait);
+ swait_finish_locked(&x->wait, &wait);
if (!x->done)
return timeout;
}
@@ -89,9 +89,9 @@ __wait_for_common(struct completion *x,
{
might_sleep();
- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
return timeout;
}
@@ -267,12 +267,12 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -290,10 +290,10 @@ bool completion_done(struct completion *x)
unsigned long flags;
int ret = 1;
- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(completion_done);
--
1.8.5.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH 3/3] rcu: use simple wait queues where possible in rcutree
2013-12-12 1:06 [PATCH 0/3] Introduce simple wait queues Paul Gortmaker
2013-12-12 1:06 ` [PATCH 1/3] wait-simple: Introduce the simple waitqueue implementation Paul Gortmaker
2013-12-12 1:06 ` [PATCH 2/3] sched/core: convert completions to use simple wait queues Paul Gortmaker
@ 2013-12-12 1:06 ` Paul Gortmaker
2013-12-12 3:42 ` Paul E. McKenney
2 siblings, 1 reply; 24+ messages in thread
From: Paul Gortmaker @ 2013-12-12 1:06 UTC (permalink / raw)
To: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Paul E. McKenney
Cc: Andrew Morton, linux-kernel, Paul Gortmaker
From: Thomas Gleixner <tglx@linutronix.de>
As of commit dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 ("rcu: Introduce
proper blocking to no-CBs kthreads GP waits") the rcu subsystem started
making use of wait queues.
Here we convert all additions of rcu wait queues to use simple wait queues,
since they don't need the extra overhead of the full wait queue features.
Originally this was done for RT kernels, since we would get things like...
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 1, pid: 8, name: rcu_preempt
Pid: 8, comm: rcu_preempt Not tainted
Call Trace:
[<ffffffff8106c8d0>] __might_sleep+0xd0/0xf0
[<ffffffff817d77b4>] rt_spin_lock+0x24/0x50
[<ffffffff8106fcf6>] __wake_up+0x36/0x70
[<ffffffff810c4542>] rcu_gp_kthread+0x4d2/0x680
[<ffffffff8105f910>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff810c4070>] ? rcu_gp_fqs+0x80/0x80
[<ffffffff8105eabb>] kthread+0xdb/0xe0
[<ffffffff8106b912>] ? finish_task_switch+0x52/0x100
[<ffffffff817e0754>] kernel_thread_helper+0x4/0x10
[<ffffffff8105e9e0>] ? __init_kthread_worker+0x60/0x60
[<ffffffff817e0750>] ? gs_change+0xb/0xb
...and hence simple wait queues were deployed on RT out of necessity
(as simple wait uses a raw lock), but mainline might as well take
advantage of the more streamline support as well.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
[PG: adapt from multiple v3.10-rt patches and add a commit log.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
kernel/rcu/tree.c | 16 ++++++++--------
kernel/rcu/tree.h | 7 ++++---
kernel/rcu/tree_plugin.h | 14 +++++++-------
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index dd08198..b35babb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1550,9 +1550,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("reqwait"));
- wait_event_interruptible(rsp->gp_wq,
- ACCESS_ONCE(rsp->gp_flags) &
- RCU_GP_FLAG_INIT);
+ swait_event_interruptible(rsp->gp_wq,
+ ACCESS_ONCE(rsp->gp_flags) &
+ RCU_GP_FLAG_INIT);
if (rcu_gp_init(rsp))
break;
cond_resched();
@@ -1576,7 +1576,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("fqswait"));
- ret = wait_event_interruptible_timeout(rsp->gp_wq,
+ ret = swait_event_interruptible_timeout(rsp->gp_wq,
((gf = ACCESS_ONCE(rsp->gp_flags)) &
RCU_GP_FLAG_FQS) ||
(!ACCESS_ONCE(rnp->qsmask) &&
@@ -1625,7 +1625,7 @@ static void rsp_wakeup(struct irq_work *work)
struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
/* Wake up rcu_gp_kthread() to start the grace period. */
- wake_up(&rsp->gp_wq);
+ swake_up(&rsp->gp_wq);
}
/*
@@ -1701,7 +1701,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
/*
@@ -2271,7 +2271,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
}
rsp->gp_flags |= RCU_GP_FLAG_FQS;
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
/*
@@ -3304,7 +3304,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
}
rsp->rda = rda;
- init_waitqueue_head(&rsp->gp_wq);
+ init_swaitqueue_head(&rsp->gp_wq);
init_irq_work(&rsp->wakeup_work, rsp_wakeup);
rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 52be957..01476e1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -28,6 +28,7 @@
#include <linux/cpumask.h>
#include <linux/seqlock.h>
#include <linux/irq_work.h>
+#include <linux/swait.h>
/*
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -200,7 +201,7 @@ struct rcu_node {
/* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
- wait_queue_head_t nocb_gp_wq[2];
+ swait_queue_head_t nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
@@ -333,7 +334,7 @@ struct rcu_data {
atomic_long_t nocb_q_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
- wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
+ swait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
@@ -403,7 +404,7 @@ struct rcu_state {
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
- wait_queue_head_t gp_wq; /* Where GP task waits. */
+ swait_queue_head_t gp_wq; /* Where GP task waits. */
int gp_flags; /* Commands for GP task. */
/* End of fields guarded by root rcu_node's lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 08a7652..b565ebd 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2060,7 +2060,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
*/
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+ swake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
}
/*
@@ -2078,8 +2078,8 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
static void rcu_init_one_nocb(struct rcu_node *rnp)
{
- init_waitqueue_head(&rnp->nocb_gp_wq[0]);
- init_waitqueue_head(&rnp->nocb_gp_wq[1]);
+ init_swaitqueue_head(&rnp->nocb_gp_wq[0]);
+ init_swaitqueue_head(&rnp->nocb_gp_wq[1]);
}
/* Is the specified CPU a no-CPUs CPU? */
@@ -2122,7 +2122,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
}
len = atomic_long_read(&rdp->nocb_q_count);
if (old_rhpp == &rdp->nocb_head) {
- wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
+ swake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
rdp->qlen_last_fqs_check = 0;
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
@@ -2218,7 +2218,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
*/
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
- wait_event_interruptible(
+ swait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
if (likely(d))
@@ -2249,7 +2249,7 @@ static int rcu_nocb_kthread(void *arg)
if (!rcu_nocb_poll) {
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("Sleep"));
- wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
+ swait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
} else if (firsttime) {
firsttime = 0;
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -2314,7 +2314,7 @@ static int rcu_nocb_kthread(void *arg)
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
rdp->nocb_tail = &rdp->nocb_head;
- init_waitqueue_head(&rdp->nocb_wq);
+ init_swaitqueue_head(&rdp->nocb_wq);
}
/* Create a kthread for each RCU flavor for each no-CBs CPU. */
--
1.8.5.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH 3/3] rcu: use simple wait queues where possible in rcutree
2013-12-12 1:06 ` [PATCH 3/3] rcu: use simple wait queues where possible in rcutree Paul Gortmaker
@ 2013-12-12 3:42 ` Paul E. McKenney
0 siblings, 0 replies; 24+ messages in thread
From: Paul E. McKenney @ 2013-12-12 3:42 UTC (permalink / raw)
To: Paul Gortmaker
Cc: Thomas Gleixner, Peter Zijlstra, Ingo Molnar,
Sebastian Andrzej Siewior, Steven Rostedt, Andrew Morton,
linux-kernel
On Wed, Dec 11, 2013 at 08:06:39PM -0500, Paul Gortmaker wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
>
> As of commit dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 ("rcu: Introduce
> proper blocking to no-CBs kthreads GP waits") the rcu subsystem started
> making use of wait queues.
>
> Here we convert all additions of rcu wait queues to use simple wait queues,
> since they don't need the extra overhead of the full wait queue features.
>
> Originally this was done for RT kernels, since we would get things like...
>
> BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
> in_atomic(): 1, irqs_disabled(): 1, pid: 8, name: rcu_preempt
> Pid: 8, comm: rcu_preempt Not tainted
> Call Trace:
> [<ffffffff8106c8d0>] __might_sleep+0xd0/0xf0
> [<ffffffff817d77b4>] rt_spin_lock+0x24/0x50
> [<ffffffff8106fcf6>] __wake_up+0x36/0x70
> [<ffffffff810c4542>] rcu_gp_kthread+0x4d2/0x680
> [<ffffffff8105f910>] ? __init_waitqueue_head+0x50/0x50
> [<ffffffff810c4070>] ? rcu_gp_fqs+0x80/0x80
> [<ffffffff8105eabb>] kthread+0xdb/0xe0
> [<ffffffff8106b912>] ? finish_task_switch+0x52/0x100
> [<ffffffff817e0754>] kernel_thread_helper+0x4/0x10
> [<ffffffff8105e9e0>] ? __init_kthread_worker+0x60/0x60
> [<ffffffff817e0750>] ? gs_change+0xb/0xb
>
> ...and hence simple wait queues were deployed on RT out of necessity
> (as simple wait uses a raw lock), but mainline might as well take
> advantage of the more streamline support as well.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> [PG: adapt from multiple v3.10-rt patches and add a commit log.]
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
You got the swake_up_all() this time, so:
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
;-)
> ---
> kernel/rcu/tree.c | 16 ++++++++--------
> kernel/rcu/tree.h | 7 ++++---
> kernel/rcu/tree_plugin.h | 14 +++++++-------
> 3 files changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index dd08198..b35babb 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1550,9 +1550,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
> trace_rcu_grace_period(rsp->name,
> ACCESS_ONCE(rsp->gpnum),
> TPS("reqwait"));
> - wait_event_interruptible(rsp->gp_wq,
> - ACCESS_ONCE(rsp->gp_flags) &
> - RCU_GP_FLAG_INIT);
> + swait_event_interruptible(rsp->gp_wq,
> + ACCESS_ONCE(rsp->gp_flags) &
> + RCU_GP_FLAG_INIT);
> if (rcu_gp_init(rsp))
> break;
> cond_resched();
> @@ -1576,7 +1576,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
> trace_rcu_grace_period(rsp->name,
> ACCESS_ONCE(rsp->gpnum),
> TPS("fqswait"));
> - ret = wait_event_interruptible_timeout(rsp->gp_wq,
> + ret = swait_event_interruptible_timeout(rsp->gp_wq,
> ((gf = ACCESS_ONCE(rsp->gp_flags)) &
> RCU_GP_FLAG_FQS) ||
> (!ACCESS_ONCE(rnp->qsmask) &&
> @@ -1625,7 +1625,7 @@ static void rsp_wakeup(struct irq_work *work)
> struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
>
> /* Wake up rcu_gp_kthread() to start the grace period. */
> - wake_up(&rsp->gp_wq);
> + swake_up(&rsp->gp_wq);
> }
>
> /*
> @@ -1701,7 +1701,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
> {
> WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
> raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
> - wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
> + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
> }
>
> /*
> @@ -2271,7 +2271,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
> }
> rsp->gp_flags |= RCU_GP_FLAG_FQS;
> raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
> - wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
> + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
> }
>
> /*
> @@ -3304,7 +3304,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
> }
>
> rsp->rda = rda;
> - init_waitqueue_head(&rsp->gp_wq);
> + init_swaitqueue_head(&rsp->gp_wq);
> init_irq_work(&rsp->wakeup_work, rsp_wakeup);
> rnp = rsp->level[rcu_num_lvls - 1];
> for_each_possible_cpu(i) {
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 52be957..01476e1 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -28,6 +28,7 @@
> #include <linux/cpumask.h>
> #include <linux/seqlock.h>
> #include <linux/irq_work.h>
> +#include <linux/swait.h>
>
> /*
> * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
> @@ -200,7 +201,7 @@ struct rcu_node {
> /* This can happen due to race conditions. */
> #endif /* #ifdef CONFIG_RCU_BOOST */
> #ifdef CONFIG_RCU_NOCB_CPU
> - wait_queue_head_t nocb_gp_wq[2];
> + swait_queue_head_t nocb_gp_wq[2];
> /* Place for rcu_nocb_kthread() to wait GP. */
> #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
> int need_future_gp[2];
> @@ -333,7 +334,7 @@ struct rcu_data {
> atomic_long_t nocb_q_count_lazy; /* (approximate). */
> int nocb_p_count; /* # CBs being invoked by kthread */
> int nocb_p_count_lazy; /* (approximate). */
> - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
> + swait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
> struct task_struct *nocb_kthread;
> #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
>
> @@ -403,7 +404,7 @@ struct rcu_state {
> unsigned long gpnum; /* Current gp number. */
> unsigned long completed; /* # of last completed gp. */
> struct task_struct *gp_kthread; /* Task for grace periods. */
> - wait_queue_head_t gp_wq; /* Where GP task waits. */
> + swait_queue_head_t gp_wq; /* Where GP task waits. */
> int gp_flags; /* Commands for GP task. */
>
> /* End of fields guarded by root rcu_node's lock. */
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 08a7652..b565ebd 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -2060,7 +2060,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
> */
> static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
> {
> - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
> + swake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
> }
>
> /*
> @@ -2078,8 +2078,8 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
>
> static void rcu_init_one_nocb(struct rcu_node *rnp)
> {
> - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
> - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
> + init_swaitqueue_head(&rnp->nocb_gp_wq[0]);
> + init_swaitqueue_head(&rnp->nocb_gp_wq[1]);
> }
>
> /* Is the specified CPU a no-CPUs CPU? */
> @@ -2122,7 +2122,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
> }
> len = atomic_long_read(&rdp->nocb_q_count);
> if (old_rhpp == &rdp->nocb_head) {
> - wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
> + swake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
> rdp->qlen_last_fqs_check = 0;
> trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
> } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> @@ -2218,7 +2218,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
> */
> trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
> for (;;) {
> - wait_event_interruptible(
> + swait_event_interruptible(
> rnp->nocb_gp_wq[c & 0x1],
> (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
> if (likely(d))
> @@ -2249,7 +2249,7 @@ static int rcu_nocb_kthread(void *arg)
> if (!rcu_nocb_poll) {
> trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
> TPS("Sleep"));
> - wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
> + swait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
> } else if (firsttime) {
> firsttime = 0;
> trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
> @@ -2314,7 +2314,7 @@ static int rcu_nocb_kthread(void *arg)
> static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
> {
> rdp->nocb_tail = &rdp->nocb_head;
> - init_waitqueue_head(&rdp->nocb_wq);
> + init_swaitqueue_head(&rdp->nocb_wq);
> }
>
> /* Create a kthread for each RCU flavor for each no-CBs CPU. */
> --
> 1.8.5.1
>
^ permalink raw reply [flat|nested] 24+ messages in thread