All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>, Al Viro <viro@ZenIV.linux.org.uk>,
	Bart Van Assche <bvanassche@acm.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Neil Brown <neilb@suse.de>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/2] sched/wait: abort_exclusive_wait() should pass TASK_NORMAL to wake_up()
Date: Thu, 1 Sep 2016 13:39:19 +0200	[thread overview]
Message-ID: <20160901113919.GI10138@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <20160826124528.GA28904@redhat.com>

On Fri, Aug 26, 2016 at 02:45:28PM +0200, Oleg Nesterov wrote:
> Otherwise this logic only works if mode is "compatible" with another
> exclusive waiter.
> 
> If some wq has both TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE waiters,
> abort_exclusive_wait() won't wait an uninterruptible waiter.
> 
> The main user is __wait_on_bit_lock() and currently it is fine but only
> because TASK_KILLABLE includes TASK_UNINTERRUPTIBLE and we do not have
> lock_page_interruptible() yet.

So mixing INTERRUPTIBLE and UNINTERRUPTIBLE and then not using
TASK_NORMAL for wakeups is a mis-feature/abuse of waitqueues IMO.

That said, people do 'creative' things, so maybe we should add some
debug infra to detect this mis-match.

Something like the below perhaps? It will miss people using the (old)
add_wait_queue() (which are plenty :/) but there's nothing quick we can
do about those.

Completely untested..

---
 include/linux/wait.h | 13 ++++++++++++-
 kernel/sched/wait.c  | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3ff74d764fa..e99ea720c5f9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -39,6 +39,9 @@ struct wait_bit_queue {
 struct __wait_queue_head {
 	spinlock_t		lock;
 	struct list_head	task_list;
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	unsigned int		state;
+#endif
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -48,6 +51,13 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)				\
+	.state = -1,
+#else
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)
+#endif
+
 #define __WAITQUEUE_INITIALIZER(name, tsk) {				\
 	.private	= tsk,						\
 	.func		= default_wake_function,			\
@@ -58,7 +68,8 @@ struct task_struct;
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+	.task_list	= { &(name).task_list, &(name).task_list },	\
+	__DEBUG_WAIT_QUEUE_HEAD_INIT(name) }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6a538a..cb71c56c5e76 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -16,6 +16,9 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_c
 	spin_lock_init(&q->lock);
 	lockdep_set_class_and_name(&q->lock, key, name);
 	INIT_LIST_HEAD(&q->task_list);
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	q->state = -1;
+#endif
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
@@ -67,6 +70,16 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 {
 	wait_queue_t *curr, *next;
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	if (q->state != -1) {
+		/*
+		 * WARN if we have INTERRUPTIBLE and UNINTERRUPTIBLE
+		 * waiters and do not use TASK_NORMAL to wake.
+		 */
+		WARN_ON_ONCE(q->state != (mode & TASK_NORMAL));
+	}
+#endif
+
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
 
@@ -156,6 +169,17 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
+static inline void prepare_debug(struct wait_queue_head *q, int state)
+{
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	if (q->state == -1) {
+		q->state = state & TASK_NORMAL;
+	} else {
+		q->state |= state & TASK_NORMAL;
+	}
+#endif
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
@@ -178,6 +202,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue(q, wait);
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -192,6 +217,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue_tail(q, wait);
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
@@ -214,6 +240,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 			__add_wait_queue(q, wait);
 	}
 	set_current_state(state);
+	prepare_debug(q, state);
 	spin_unlock_irqrestore(&q->lock, flags);
 
 	return 0;

  reply	other threads:[~2016-09-01 11:39 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-26 12:44 [PATCH 0/2] sched/wait: abort_exclusive_wait() should pass TASK_NORMAL to wake_up() Oleg Nesterov
2016-08-26 12:45 ` [PATCH 1/2] " Oleg Nesterov
2016-09-01 11:39   ` Peter Zijlstra [this message]
2016-09-01 17:26     ` Oleg Nesterov
2016-09-01 18:09       ` Peter Zijlstra
2016-08-26 12:45 ` [PATCH 2/2] sched/wait: avoid abort_exclusive_wait() in __wait_on_bit_lock() Oleg Nesterov
2016-08-26 12:47   ` Oleg Nesterov
2016-09-01 19:01   ` Peter Zijlstra
2016-09-01 19:08     ` Peter Zijlstra
2016-09-02 12:06       ` Oleg Nesterov
2016-09-01 22:17     ` Peter Zijlstra
2016-09-02 12:06       ` Oleg Nesterov
2016-09-02 13:20         ` Peter Zijlstra
2016-09-02 12:06     ` Oleg Nesterov
2016-09-01 11:03 ` [PATCH 0/2] sched/wait: abort_exclusive_wait() should pass TASK_NORMAL to wake_up() Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160901113919.GI10138@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bvanassche@acm.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=neilb@suse.de \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.