All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Andy Lutomirski <luto@amacapital.net>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <bitbucket@online.de>, X86 ML <x86@kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [RFC] sched: Add a new lockless wake-from-idle implementation
Date: Thu, 13 Feb 2014 21:16:18 +0100	[thread overview]
Message-ID: <20140213201618.GD14089@laptop.programming.kicks-ass.net> (raw)
In-Reply-To: <CALCETrWpDxt1xhBx=CWq-fwSBoCpbC_stoouY2mPX8u4u8XNhQ@mail.gmail.com>

On Thu, Feb 13, 2014 at 09:07:10AM -0800, Andy Lutomirski wrote:
> > I also don't really like how the polling state is an atomic; its a cpu
> > local property.
> 
> Your patch also makes polling state be an atomic (albeit one that
> isn't changed remotely).

Yah, sorry for that, changed the email (and code about) a number of
times before posting :/

> On the subject of major surgery, though: there are very few places in
> the kernel where TIF_NEED_RESCHED gets set.  With something like my
> patch applied, I think that there is no code at all that sets any
> other task's TIF_NEED_RESCHED.  That suggests that all
> set_tsk_need_resched callers could just call into the scheduler
> directly. 

One of the main callers would be the timer tick for local preemption;
that's from interrupt context, can't call schedule() there, really needs
to be the interrupt return path.

> If so, the change could probably delete a whole lot of
> assembly code, and every kernel exit would get faster.

We already need to load that word for all kinds of other things; like
delivering signals, so testing the one bit in the return path is
basically free.

Anyway; after all this mucking about I finally remembered Venki once
attempted something similar:

  https://lkml.org/lkml/2012/2/6/357

How about something like this?

---
 arch/x86/include/asm/mwait.h | 33 ++++++++++++++++--------
 arch/x86/kernel/process.c    |  2 ++
 arch/x86/kernel/smp.c        | 61 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 1da25a5f96f9..cb7bb8bb6617 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_MWAIT_H
 #define _ASM_X86_MWAIT_H
 
+#include <linux/percpu.h>
 #include <linux/sched.h>
 
 #define MWAIT_SUBSTATE_MASK		0xf
@@ -15,6 +16,14 @@
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 
+#define MWAIT_IPI_ENABLED		0x01
+#define MWAIT_IPI_RESCHED		0x02
+#define MWAIT_IPI_SINGLE		0x04
+
+extern void mwait_intercept_handler(void);
+
+DECLARE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
 {
@@ -42,18 +51,20 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
  */
 static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-	if (!current_set_polling_and_test()) {
-		if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
-			mb();
-			clflush((void *)&current_thread_info()->flags);
-			mb();
-		}
-
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		if (!need_resched())
-			__mwait(eax, ecx);
+	unsigned int *ptr = this_cpu_ptr(&mwait_ipi);
+	unsigned int old = xchg(ptr, MWAIT_IPI_ENABLED);
+
+	WARN_ON_ONCE(old);
+
+	if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
+		mb();
+		clflush((void *)ptr);
+		mb();
 	}
-	current_clr_polling();
+
+	__monitor((void *)ptr, 0, 0);
+	if (!(*ptr & ~MWAIT_IPI_ENABLED))
+		__mwait(eax, ecx);
 }
 
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..00afb2b676b8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
+#include <asm/mwait.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -286,6 +287,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle_exit(void)
 {
 	__exit_idle();
+	mwait_intercept_handler();
 }
 
 void arch_cpu_idle_dead(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e4..4b078a8d6b83 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,8 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
@@ -31,6 +33,8 @@
 #include <asm/apic.h>
 #include <asm/nmi.h>
 #include <asm/trace/irq_vectors.h>
+#include <asm/mwait.h>
+
 /*
  *	Some notes on x86 processor bugs affecting SMP operation:
  *
@@ -113,6 +117,56 @@
 static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 static bool smp_no_nmi_ipi = false;
 
+DEFINE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+EXPORT_PER_CPU_SYMBOL_GPL(mwait_ipi);
+
+static bool mwait_intercept(int cpu, int ipi)
+{
+	u32 *ptr = &per_cpu(mwait_ipi, cpu);
+	u32 val, new, old;
+
+	if (!static_cpu_has(X86_FEATURE_MWAIT))
+		return false;
+
+	val = *ptr;
+	if (!(val & MWAIT_IPI_ENABLED))
+		return false;
+
+	for (;;) {
+		new = val | ipi;
+		old = cmpxchg(ptr, val, new);
+		if (old == val)
+			break;
+		val = old;
+	}
+
+	if (!(old & MWAIT_IPI_ENABLED))
+		return false;
+
+	return true;
+}
+
+void mwait_intercept_handler(void)
+{
+	unsigned int val, *ptr;
+
+	if (!static_cpu_has(X86_FEATURE_MWAIT))
+		return;
+
+	ptr = this_cpu_ptr(&mwait_ipi);
+	val = xchg(ptr, 0);
+
+	if (!(val & ~MWAIT_IPI_ENABLED))
+		return;
+
+	local_irq_disable();
+	if (val & MWAIT_IPI_RESCHED)
+		scheduler_ipi();
+	if (val & MWAIT_IPI_SINGLE)
+		generic_smp_call_function_single_interrupt();
+	local_irq_enable();
+}
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
@@ -124,12 +178,15 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+
+	if (!mwait_intercept(cpu, MWAIT_IPI_RESCHED))
+		apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	if (!mwait_intercept(cpu, MWAIT_IPI_SINGLE))
+		apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 void native_send_call_func_ipi(const struct cpumask *mask)

  reply	other threads:[~2014-02-13 20:49 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-11 20:23 Too many rescheduling interrupts (still!) Andy Lutomirski
2014-02-11 21:21 ` Thomas Gleixner
2014-02-11 22:34   ` Andy Lutomirski
2014-02-12 10:13     ` Peter Zijlstra
2014-02-12 15:49       ` Andy Lutomirski
2014-02-12 16:39         ` Peter Zijlstra
2014-02-12 18:19           ` Andy Lutomirski
2014-02-12 20:17             ` Peter Zijlstra
2014-02-13  1:40               ` [RFC] sched: Add a new lockless wake-from-idle implementation Andy Lutomirski
2014-02-13  9:38                 ` Ingo Molnar
2014-02-13 14:49                 ` Frederic Weisbecker
2014-02-13 14:50                 ` Peter Zijlstra
2014-02-13 17:07                   ` Andy Lutomirski
2014-02-13 20:16                     ` Peter Zijlstra [this message]
2014-02-13 20:35                       ` Andy Lutomirski
2014-02-13 19:58                   ` Andy Lutomirski
2014-02-14  1:38                     ` Andy Lutomirski
2014-02-14 20:01                   ` Andy Lutomirski
2014-02-14 20:17                     ` Andy Lutomirski
2014-02-14 21:19                       ` Peter Zijlstra
2014-02-12 15:59       ` Too many rescheduling interrupts (still!) Frederic Weisbecker
2014-02-12 16:43         ` Peter Zijlstra
2014-02-12 17:46           ` Frederic Weisbecker
2014-02-12 18:15             ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140213201618.GD14089@laptop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bitbucket@online.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.