[RFC][PATCH 2/5] sched: Add NEED_RESCHED to the preempt_count

linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>, Peter Anvin <hpa@zytor.com>,
	Mike Galbraith <bitbucket@online.de>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arjan van de Ven <arjan@linux.intel.com>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Peter Zijlstra <peterz@infradead.org>
Subject: [RFC][PATCH 2/5] sched: Add NEED_RESCHED to the preempt_count
Date: Wed, 14 Aug 2013 15:15:41 +0200	[thread overview]
Message-ID: <20130814133142.651343242@chello.nl> (raw)
In-Reply-To: 20130814131539.790947874@chello.nl

[-- Attachment #1: peterz-preempt_count-need_resched.patch --]
[-- Type: text/plain, Size: 7527 bytes --]

In order to combine the preemption and need_resched test we need to
fold the need_resched information into the preempt_count value.

We keep the existing TIF_NEED_RESCHED infrastructure in place but at 3
sites test it and fold its value into preempt_count; namely:

 - resched_task() when setting TIF_NEED_RESCHED on the current task
 - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a
                   remote task it follows it up with a reschedule IPI
                   and we can modify the cpu local preempt_count from
                   there.
 - cpu_idle_loop() for when resched_task() found tsk_is_polling().

We use an inverted bitmask to indicate need_resched so that a 0 means
both need_resched and !atomic.

Also remove the barrier() in preempt_enable() between
preempt_enable_no_resched() and preempt_check_resched() to avoid
having to reload the preemption value and allow the compiler to use
the flags of the previuos decrement. I couldn't come up with any sane
reason for this barrier() to be there as preempt_enable_no_resched()
already has a barrier() before doing the decrement.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-7io71l00ukk9a78unrglfldi@git.kernel.org
---
 include/linux/preempt.h     |   42 +++++++++++++++++++++++++++++++++++++-----
 include/linux/sched.h       |    2 +-
 include/linux/thread_info.h |    1 +
 kernel/context_tracking.c   |    3 +--
 kernel/cpu/idle.c           |   10 ++++++++++
 kernel/sched/core.c         |   20 ++++++++++++++------
 6 files changed, 64 insertions(+), 14 deletions(-)

--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,9 +10,19 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+/*
+ * We use the MSB mostly because its available; see <linux/hardirq.h> for
+ * the other bits.
+ */
+#define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
 static __always_inline int preempt_count(void)
 {
-	return current_thread_info()->preempt_count;
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
 }
 
 static __always_inline int *preempt_count_ptr(void)
@@ -20,6 +30,30 @@ static __always_inline int *preempt_coun
 	return &current_thread_info()->preempt_count;
 }
 
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
@@ -37,7 +71,7 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_check_resched() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule(); \
 } while (0)
 
@@ -47,7 +81,7 @@ void preempt_schedule_context(void);
 
 #define preempt_check_resched_context() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule_context(); \
 } while (0)
 #else
@@ -83,7 +117,6 @@ do { \
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
-	barrier(); \
 	preempt_check_resched(); \
 } while (0)
 
@@ -111,7 +144,6 @@ do { \
 #define preempt_enable_notrace() \
 do { \
 	preempt_enable_no_resched_notrace(); \
-	barrier(); \
 	preempt_check_resched_context(); \
 } while (0)
 
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2405,7 +2405,7 @@ static inline int signal_pending_state(l
 
 static inline int need_resched(void)
 {
-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+	return unlikely(test_preempt_need_resched());
 }
 
 /*
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,6 +104,7 @@ static inline int test_ti_thread_flag(st
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
 
+#define test_need_resched()	test_thread_flag(TIF_NEED_RESCHED)
 #define set_need_resched()	set_thread_flag(TIF_NEED_RESCHED)
 #define clear_need_resched()	clear_thread_flag(TIF_NEED_RESCHED)
 
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -87,10 +87,9 @@ void user_enter(void)
  */
 void __sched notrace preempt_schedule_context(void)
 {
-	struct thread_info *ti = current_thread_info();
 	enum ctx_state prev_ctx;
 
-	if (likely(ti->preempt_count || irqs_disabled()))
+	if (likely(preempt_count() || irqs_disabled()))
 		return;
 
 	/*
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -106,6 +106,16 @@ static void cpu_idle_loop(void)
 				current_set_polling();
 			}
 			arch_cpu_idle_exit();
+			/*
+			 * We need to test and propagate the TIF_NEED_RESCHED
+			 * bit here because we might not have send the
+			 * reschedule IPI to idle tasks.
+			 *
+			 * XXX is there a better place in the generic idle code
+			 *     for this?
+			 */
+			if (test_need_resched())
+				set_preempt_need_resched();
 		}
 		tick_nohz_idle_exit();
 		schedule_preempt_disabled();
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -526,8 +526,10 @@ void resched_task(struct task_struct *p)
 	set_tsk_need_resched(p);
 
 	cpu = task_cpu(p);
-	if (cpu == smp_processor_id())
+	if (cpu == smp_processor_id()) {
+		set_preempt_need_resched();
 		return;
+	}
 
 	/* NEED_RESCHED must be visible before we test polling */
 	smp_mb();
@@ -1411,6 +1413,14 @@ static void sched_ttwu_pending(void)
 
 void scheduler_ipi(void)
 {
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	if (test_need_resched())
+		set_preempt_need_resched();
+
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
 			&& !got_nohz_idle_kick())
@@ -2433,6 +2443,7 @@ static void __sched __schedule(void)
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
 	clear_tsk_need_resched(prev);
+	clear_preempt_need_resched();
 	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
@@ -2515,13 +2526,11 @@ void __sched schedule_preempt_disabled(v
  */
 asmlinkage void __sched notrace preempt_schedule(void)
 {
-	struct thread_info *ti = current_thread_info();
-
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task. Just return..
 	 */
-	if (likely(ti->preempt_count || irqs_disabled()))
+	if (likely(preempt_count() || irqs_disabled()))
 		return;
 
 	do {
@@ -2546,11 +2555,10 @@ EXPORT_SYMBOL(preempt_schedule);
  */
 asmlinkage void __sched preempt_schedule_irq(void)
 {
-	struct thread_info *ti = current_thread_info();
 	enum ctx_state prev_state;
 
 	/* Catch callers which need to be fixed */
-	BUG_ON(ti->preempt_count || !irqs_disabled());
+	BUG_ON(preempt_count() || !irqs_disabled());
 
 	prev_state = exception_enter();

next prev parent reply	other threads:[~2013-08-14 13:15 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-14 13:15 [RFC][PATCH 0/5] preempt_count rework Peter Zijlstra
2013-08-14 13:15 ` Peter Zijlstra
2013-08-14 13:15 ` [RFC][PATCH 1/5] sched: Introduce preempt_count accessor functions Peter Zijlstra
2013-08-14 13:15   ` Peter Zijlstra
2013-08-14 13:15 ` Peter Zijlstra [this message]
2013-08-14 13:15   ` [RFC][PATCH 2/5] sched: Add NEED_RESCHED to the preempt_count Peter Zijlstra
2013-08-14 13:15 ` [RFC][PATCH 3/5] sched, arch: Create asm/preempt.h Peter Zijlstra
2013-08-14 13:15   ` Peter Zijlstra
2013-08-14 13:15 ` [RFC][PATCH 4/5] sched: Create more preempt_count accessors Peter Zijlstra
2013-08-14 13:15   ` Peter Zijlstra
2013-08-14 13:15 ` [RFC][PATCH 5/5] sched, x86: Provide a per-cpu preempt_count implementation Peter Zijlstra
2013-08-14 13:47 ` [RFC][PATCH 0/5] preempt_count rework H. Peter Anvin
2013-08-14 15:39   ` Mike Galbraith
2013-08-14 15:39     ` Mike Galbraith
2013-08-14 15:43     ` H. Peter Anvin
2013-08-15  9:01       ` Mike Galbraith
2013-08-14 16:06     ` Peter Zijlstra
2013-08-14 16:14       ` H. Peter Anvin
2013-08-14 16:52         ` Peter Zijlstra
2013-08-14 16:58           ` H. Peter Anvin
2013-08-14 16:04   ` Peter Zijlstra
2013-08-14 17:31     ` Peter Zijlstra
2013-08-14 16:48 ` Andi Kleen
2013-08-14 16:48   ` Andi Kleen
2013-08-14 16:55   ` Peter Zijlstra
2013-08-14 17:12     ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130814133142.651343242@chello.nl \
    --to=peterz@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=arjan@linux.intel.com \
    --cc=bitbucket@online.de \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).