All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Steven Rostedt <rostedt@goodmis.org>
Subject: Re: [PATCH 3/7] wait.[ch]: Introduce the simple waitqueue (swait) implementation
Date: Wed, 14 Jan 2015 11:38:34 +0100	[thread overview]
Message-ID: <20150114103834.GN23965@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <1413591782-23453-4-git-send-email-paul.gortmaker@windriver.com>



So I had a look at this yesterday and came up with the below --
completely untested etc.

Now in order to compile test I meant to convert the completion code and
ran head first into complete_all; it uses spin_lock_irqsave() which
means it can be used from IRQ context. Now if you look at
__swake_up_all() you'll find a comment on how we cannot have this.

Now I can't remember how important that all was for RT but I figured I'd
post it and let other people stare at it for a bit.

---
 include/linux/swait.h |  181 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/swait.c  |  162 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 343 insertions(+)

--- /dev/null
+++ b/include/linux/swait.h
@@ -0,0 +1,181 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <asm/current.h>
+
+/*
+ * Simple wait queues
+ *
+ * While these are very similar to the other/complex wait queues (wait.h) the
+ * most important difference is that the simple waitqueue allows for
+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
+ *
+ * In order to make this so, we had to drop a fair number of features of the
+ * other waitqueue code; notably:
+ *
+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue
+ *
+ *  - the exclusive mode; because this requires preserving the list order
+ *    and this is hard, see __swait_wake().
+ *
+ *  - custom wake functions; because you cannot give any guarantees about
+ *    random code.
+ *
+ * As a side effect of this; the data structures are slimmer.
+ *
+ * One would recommend using this wait queue where possible.
+ */
+
+struct task_struct;
+
+struct swait_queue_head {
+	raw_spinlock_t		lock;
+#ifdef CONFIG_SWAIT_DEBUG
+	unsigned int		state;
+#endif
+	struct list_head	task_list;
+};
+
+struct swait_queue {
+	struct task_struct	*task;
+	struct list_head	task_list;
+};
+
+#define __SWAITQUEUE_INITIALIZER(name) {				\
+	.task		= current,					\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAITQUEUE(name)					\
+	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
+
+#define __SWAIT_QUEUE_HEAD_DEBUG_INIT()					\
+	.state = 0,
+
+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+	__SWAIT_QUEUE_HEAD_DEBUG_INIT()					\
+}
+
+#define DECLARE_SWAIT_QUEUE_HEAD(name)					\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+				    struct lock_class_key *key);
+
+#define init_swait_queue_head(q)				\
+	do {							\
+		static struct lock_class_key __key;		\
+		__init_swait_queue_head((q), #q, &__key);	\
+	} while (0)
+
+#ifdef CONFIG_LOCKDEP
+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
+	({ init_swait_queue_head(&name); name; })
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	DECLARE_SWAIT_QUEUE_HEAD(name)
+#endif
+
+static inline int swait_active(struct swait_queue_head *q)
+{
+	return !list_empty(&q->task_list);
+}
+
+extern void __swake_up(struct swait_queue_head *q, unsigned int mode);
+extern void __swake_up_all(struct swait_queue_head *q, unsigned int mode);
+extern void __swake_up_locked(struct swait_queue_head *q, unsigned int mode);
+
+#define swake_up(x)			__swake_up(x, TASK_NORMAL)
+#define swake_up_all(x)			__swake_up_all(x, TASK_NORMAL)
+#define swake_up_locked(x)		__swake_up_locked((x), TASK_NORMAL)
+
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
+
+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+
+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd)			\
+({									\
+	struct swait_queue __wait;					\
+	long __ret = ret;						\
+									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	for (;;) {							\
+		long __int = prepare_to_swait_event(&wq, &__wait, state);\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_swait(&wq, &__wait);					\
+	__ret;								\
+})
+
+#define __swait_event(wq, condition)					\
+	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
+			    schedule())
+
+#define swait_event(wq, condition)					\
+do {									\
+	if (condition)							\
+		break;							\
+	__swait_event(wq, condition);					\
+} while (0)
+
+#define __swait_event_timeout(wq, condition, timeout)			\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_timeout(wq, condition, timeout)			\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_timeout(wq, condition, timeout);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible(wq, condition)			\
+	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
+		      schedule())
+
+#define swait_event_interruptible(wq, condition)			\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__ret = __swait_event_interruptible(wq, condition);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible_timeout(wq, condition, timeout)	\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_interruptible_timeout(wq,		\
+						condition, timeout);	\
+	__ret;								\
+})
+
+#endif /* _LINUX_SWAIT_H */
--- /dev/null
+++ b/kernel/sched/swait.c
@@ -0,0 +1,162 @@
+
+#include <linux/swait.h>
+
+void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+			     struct lock_class_key *key)
+{
+	raw_spin_lock_init(&q->lock);
+	lockdep_set_class_and_name(&q->lock, key, name);
+	INIT_LIST_HEAD(&q->task_list);
+#ifdef CONFIG_SWAIT_DEBUG
+	q->state = 0;
+#endif
+}
+EXPORT_SYMBOL(__init_swait_queue_head);
+
+#ifdef CONFIG_SWAIT_DEBUG
+/*
+ * Ensure we do not mix and match INTERRUPTIBLE and UNINTERRUPTIBLE sleeps.
+ * This guarantees wakeups are always valid and we need not go look for
+ * wakeup targets, this ensures __swake_up() is O(1).
+ */
+static inline void __swait_wakeup_debug(struct swait_queue_head *q, unsigned int state)
+{
+	if (q->state == 0)
+		return;
+
+	WARN_ON_ONCE(!(q->state & state));
+}
+static inline void __swait_wait_debug(struct swait_queue_head *q, unsigned int state)
+{
+	if (q->state == 0)
+		q->state = state;
+
+	WARN_ON_ONCE(q->state != state);
+}
+#else
+static inline void __swait_wakeup_debug(struct swait_queue_head *q, unsigned int state)
+{
+}
+static inline void __swait_wait_debug(struct swait_queue_head *q, unsigned int state)
+{
+}
+#endif
+
+/*
+ * The thing about the wake_up_state() return value; I think we can ignore it.
+ *
+ * If for some reason it would return 0, that means the previously waiting
+ * task is already running, so it will observe condition true (or has already).
+ */
+void __swake_up_locked(struct swait_queue_head *q, unsigned int state)
+{
+	struct swait_queue *curr;
+
+	__swait_wakeup_debug(q, state);
+
+	list_for_each_entry(curr, &q->task_list, task_list) {
+		wake_up_state(curr->task, state);
+		list_del_init(&curr->task_list);
+		break;
+	}
+}
+EXPORT_SYMBOL(__swake_up_locked);
+
+void __swake_up(struct swait_queue_head *q, unsigned int state)
+{
+	unsigned long flags;
+
+	__swait_wakeup_debug(q, state);
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	__swake_up_locked(q, state);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(__swake_up);
+
+/*
+ * Does not allow usage from IRQ disabled, since we must be able to
+ * release IRQs to guarantee bounded hold time.
+ */
+void __swake_up_all(struct swait_queue_head *q, unsigned int state)
+{
+	struct swait_queue *curr, *next;
+	LIST_HEAD(tmp);
+
+	__swait_wakeup_debug(q, state);
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irq(&q->lock);
+	list_splice_init(&q->task_list, &tmp);
+	while (!list_empty(&tmp)) {
+		curr = list_first_entry(&tmp, typeof(curr), task_list);
+
+		wake_up_state(curr->task, state);
+		list_del_init(&curr->task_list);
+
+		if (list_empty(&tmp))
+			break;
+
+		raw_spin_unlock_irq(&q->lock);
+		raw_spin_lock_irq(&q->lock);
+	}
+	raw_spin_unlock_irq(&q->lock);
+}
+EXPORT_SYMBOL(__swake_up_all);
+
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	wait->task = current;
+	if (list_empty(&wait->node))
+		list_add(&wait->task_list, &q->task_list);
+}
+
+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	unsigned long flags;
+
+	__swait_wait_debug(q, state);
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	__prepare_to_swait(q, wait);
+	set_current_state(state);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_swait);
+
+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	if (signal_pending_state(state, current))
+		return -ERESTARTSYS;
+
+	prepare_to_swait(q, wait, state);
+
+	return 0;
+}
+EXPORT_SYMBOL(prepare_to_swait_event);
+
+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	__set_current_state(TASK_RUNNING);
+	if (!list_empty(&wait->task_list))
+		list_del_init(&wait->task_list);
+}
+
+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	unsigned long flags;
+
+	__set_current_state(TASK_RUNNING);
+
+	if (!list_empty_careful(&wait->task_list)) {
+		raw_spin_lock_irqsave(&q->lock, flags);
+		list_del_init(&wait->task_list);
+		raw_spin_unlock_irqrestore(&q->lock, flags);
+	}
+}
+EXPORT_SYMBOL(finish_swait);

  parent reply	other threads:[~2015-01-14 10:38 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-18  0:22 [PATCH v2 0/7] simple wait queue support (from -rt) Paul Gortmaker
2014-10-18  0:22 ` [PATCH 1/7] wait.h: mark complex wait functions to prepare for simple wait Paul Gortmaker
2014-10-18  0:22 ` [PATCH 2/7] wait.c: " Paul Gortmaker
2014-10-18  0:22 ` [PATCH 3/7] wait.[ch]: Introduce the simple waitqueue (swait) implementation Paul Gortmaker
2014-10-18 21:34   ` Peter Zijlstra
2014-10-18 23:05     ` Steven Rostedt
2014-10-20 15:21       ` Paul Gortmaker
2014-10-20 15:40         ` Steven Rostedt
2014-10-20 16:05           ` Paul Gortmaker
2014-10-20 16:47             ` Steven Rostedt
2014-10-20 13:44     ` Paul Gortmaker
2015-01-14 10:38   ` Peter Zijlstra [this message]
2015-01-14 14:18     ` Steven Rostedt
2015-01-14 14:31       ` Peter Zijlstra
2015-01-14 15:01         ` Steven Rostedt
2015-01-14 15:29           ` Peter Zijlstra
2014-10-18  0:22 ` [PATCH 4/7] sched/completion: convert completions to use simple wait queues Paul Gortmaker
2014-10-18  0:23 ` [PATCH 5/7] rcu: use simple wait queues where possible in rcutree Paul Gortmaker
2014-10-18  0:23 ` [PATCH 6/7] simplewait: don't run a possibly infinite number of wake under raw lock Paul Gortmaker
2014-10-18  0:23 ` [PATCH 7/7] simplewait: do we make barriers reflect what was in use in -rt? Paul Gortmaker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150114103834.GN23965@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.