linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 2/3] restartable sequences: x86 ABI
       [not found] ` <20150624222609.6116.86035.stgit-tdHu5vqousHHt/MElyovVYaSKrA+ACpX0E9HWUfgJXw@public.gmane.org>
@ 2015-06-24 22:26   ` Paul Turner
       [not found]     ` <20150624222609.6116.30992.stgit-tdHu5vqousHHt/MElyovVYaSKrA+ACpX0E9HWUfgJXw@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Turner @ 2015-06-24 22:26 UTC (permalink / raw)
  To: Peter Zijlstra, Paul E. McKenney, Mathieu Desnoyers
  Cc: Andrew Hunter, Andi Kleen, Lai Jiangshan,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Steven Rostedt,
	Josh Triplett, Ingo Molnar, Andrew Morton, Andy Lutomirski,
	Linus Torvalds, Chris Lameter

Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
execution within restartable sequence sections.

With respect to the x86-specific ABI:
  On 32-bit:           Upon restart, the interrupted rip is placed in %ecx
  On 64-bit (or x32):  Upon restart, the interrupted rip is placed in %r10

While potentially surprising at first glance, this choice is strongly motivated
by the fact that the available scratch registers under the i386 function call
ABI overlap with those used as argument registers under x86_64.

Given that sequences are already personality specific and that we always want
the arguments to be available for sequence restart, it's much more natural to
ultimately differentiate the ABI in these two cases.

Signed-off-by: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/include/asm/restartable_sequences.h |   50 +++++++++++++++++++
 arch/x86/kernel/Makefile                     |    2 +
 arch/x86/kernel/restartable_sequences.c      |   69 ++++++++++++++++++++++++++
 arch/x86/kernel/signal.c                     |   12 +++++
 kernel/restartable_sequences.c               |   11 +++-
 5 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/include/asm/restartable_sequences.h
 create mode 100644 arch/x86/kernel/restartable_sequences.c

diff --git a/arch/x86/include/asm/restartable_sequences.h b/arch/x86/include/asm/restartable_sequences.h
new file mode 100644
index 0000000..0ceb024
--- /dev/null
+++ b/arch/x86/include/asm/restartable_sequences.h
@@ -0,0 +1,50 @@
+#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
+#define _ASM_X86_RESTARTABLE_SEQUENCES_H
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+
+static inline bool arch_rseq_in_crit_section(struct task_struct *p,
+					     struct pt_regs *regs)
+{
+	struct task_struct *leader = p->group_leader;
+	struct restartable_sequence_state *rseq_state = &leader->rseq_state;
+
+	unsigned long ip = (unsigned long)regs->ip;
+	if (unlikely(ip < (unsigned long)rseq_state->crit_end &&
+		     ip >= (unsigned long)rseq_state->crit_start))
+		return true;
+
+	return false;
+}
+
+static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT
+	/*
+	 * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
+	 * case that we took an interrupt during syscall entry.  Avoid this by
+	 * always deferring to our notify-resume handler.
+	 */
+	return true;
+#else
+	return arch_rseq_in_crit_section(p, task_pt_regs(p));
+#endif
+}
+
+void arch_rseq_handle_notify_resume(struct pt_regs *regs);
+void arch_rseq_check_critical_section(struct task_struct *p,
+				      struct pt_regs *regs);
+
+#else /* !CONFIG_RESTARTABLE_SEQUENCES */
+
+static inline void arch_rseq_handle_notify_resume(struct pt_regs *regs) {}
+static inline void arch_rseq_check_critical_section(struct task_struct *p,
+						    struct pt_regs *regs) {}
+
+#endif
+
+#endif /* _ASM_X86_RESTARTABLE_SEQUENCES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index febaf18..bd7827d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -113,6 +113,8 @@ obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_IOSF_MBI)			+= iosf_mbi.o
 obj-$(CONFIG_PMC_ATOM)			+= pmc_atom.o
 
+obj-$(CONFIG_RESTARTABLE_SEQUENCES)	+= restartable_sequences.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/restartable_sequences.c b/arch/x86/kernel/restartable_sequences.c
new file mode 100644
index 0000000..3b38013
--- /dev/null
+++ b/arch/x86/kernel/restartable_sequences.c
@@ -0,0 +1,69 @@
+/*
+ * Restartable Sequences: x86 ABI.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2015, Google, Inc.,
+ * Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> and Andrew Hunter <ahh-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <asm/restartable_sequences.h>
+
+void arch_rseq_check_critical_section(struct task_struct *p,
+				      struct pt_regs *regs)
+{
+	if (!arch_rseq_in_crit_section(p, regs))
+		return;
+
+	/* RSEQ only applies to user-mode execution */
+	BUG_ON(!user_mode(regs));
+
+	/*
+	 * The ABI is slightly different for {32,64}-bit threads on x86
+	 *
+	 * Short version:
+	 *   x86-64 (or x32): interrupted rip => %r10
+	 *   i386:            interrupted rip => %ecx
+	 *
+	 * Longer version:
+	 * The scratch registers available under the i386 function call ABI
+	 * overlap with those used by argument registers under the x86_64 ABI.
+	 *
+	 * Given that the sequence block is already personality specific in
+	 * that it must be entered by 'call' and that we always want the
+	 * arguments available for a sequence restart; it's more natural to
+	 * differentiate the ABI in these two cases.
+	 */
+	if (unlikely(test_tsk_thread_flag(p, TIF_IA32)))
+		regs->cx = regs->ip; /* i386 */
+	else
+		regs->r10 = regs->ip; /* x86-64/x32 */
+
+	regs->ip = (unsigned long)p->group_leader->rseq_state.crit_restart;
+}
+
+void arch_rseq_handle_notify_resume(struct pt_regs *regs)
+{
+	struct restartable_sequence_state *rseq_state = &current->rseq_state;
+
+	/* If this update fails our user-state is incoherent. */
+	if (put_user(task_cpu(current), rseq_state->cpu_pointer))
+		force_sig(SIGSEGV, current);
+
+	arch_rseq_check_critical_section(current, regs);
+}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 206996c..987c50b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -31,6 +31,7 @@
 #include <asm/vdso.h>
 #include <asm/mce.h>
 #include <asm/sighandling.h>
+#include <asm/restartable_sequences.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/proto.h>
@@ -617,6 +618,15 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	sigset_t *set = sigmask_to_save();
 	compat_sigset_t *cset = (compat_sigset_t *) set;
 
+	/*
+	 * If we are executing in the critical section of a restartable
+	 * sequence we need to fix up the user's stack saved ip at this point
+	 * so that signal handler return does not allow us to jump back into
+	 * the block across a context switch boundary.
+	 */
+	if (rseq_active(current))
+		arch_rseq_check_critical_section(current, regs);
+
 	/* Set up the stack frame */
 	if (is_ia32_frame()) {
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
@@ -755,6 +765,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (rseq_active(current))
+			arch_rseq_handle_notify_resume(regs);
 	}
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
diff --git a/kernel/restartable_sequences.c b/kernel/restartable_sequences.c
index 72945f2..9102241 100644
--- a/kernel/restartable_sequences.c
+++ b/kernel/restartable_sequences.c
@@ -24,17 +24,22 @@
 
 #ifdef CONFIG_RESTARTABLE_SEQUENCES
 
+#include <asm/restartable_sequences.h>
 #include <linux/uaccess.h>
 #include <linux/preempt.h>
 #include <linux/syscalls.h>
 
 static void rseq_sched_in_nop(struct preempt_notifier *pn, int cpu) {}
-static void rseq_sched_out_nop(struct preempt_notifier *pn,
-			       struct task_struct *next) {}
+static void rseq_sched_out(struct preempt_notifier *pn,
+			   struct task_struct *next)
+{
+	if (arch_rseq_needs_notify_resume(current))
+		set_thread_flag(TIF_NOTIFY_RESUME);
+}
 
 static __read_mostly struct preempt_ops rseq_preempt_ops = {
 	.sched_in = rseq_sched_in_nop,
-	.sched_out = rseq_sched_out_nop,
+	.sched_out = rseq_sched_out,
 };
 
 int rseq_register_cpu_pointer_current(int __user *cpu_pointer)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 2/3] restartable sequences: x86 ABI
       [not found]     ` <20150624222609.6116.30992.stgit-tdHu5vqousHHt/MElyovVYaSKrA+ACpX0E9HWUfgJXw@public.gmane.org>
@ 2015-06-26 18:09       ` Mathieu Desnoyers
       [not found]         ` <1050218158.4054.1435342186284.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Mathieu Desnoyers @ 2015-06-26 18:09 UTC (permalink / raw)
  To: Paul Turner
  Cc: Peter Zijlstra, Paul E. McKenney, Andrew Hunter, Andi Kleen,
	Lai Jiangshan, linux-api, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	rostedt, Josh Triplett, Ingo Molnar, Andrew Morton,
	Andy Lutomirski, Linus Torvalds, Chris Lameter

----- On Jun 24, 2015, at 6:26 PM, Paul Turner pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org wrote:

> Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
> execution within restartable sequence sections.
> 
> With respect to the x86-specific ABI:
>  On 32-bit:           Upon restart, the interrupted rip is placed in %ecx
>  On 64-bit (or x32):  Upon restart, the interrupted rip is placed in %r10
> 
> While potentially surprising at first glance, this choice is strongly motivated
> by the fact that the available scratch registers under the i386 function call
> ABI overlap with those used as argument registers under x86_64.
> 
> Given that sequences are already personality specific and that we always want
> the arguments to be available for sequence restart, it's much more natural to
> ultimately differentiate the ABI in these two cases.
> 
> Signed-off-by: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> ---
> arch/x86/include/asm/restartable_sequences.h |   50 +++++++++++++++++++
> arch/x86/kernel/Makefile                     |    2 +
> arch/x86/kernel/restartable_sequences.c      |   69 ++++++++++++++++++++++++++
> arch/x86/kernel/signal.c                     |   12 +++++
> kernel/restartable_sequences.c               |   11 +++-
> 5 files changed, 141 insertions(+), 3 deletions(-)
> create mode 100644 arch/x86/include/asm/restartable_sequences.h
> create mode 100644 arch/x86/kernel/restartable_sequences.c
> 
> diff --git a/arch/x86/include/asm/restartable_sequences.h
> b/arch/x86/include/asm/restartable_sequences.h
> new file mode 100644
> index 0000000..0ceb024
> --- /dev/null
> +++ b/arch/x86/include/asm/restartable_sequences.h
> @@ -0,0 +1,50 @@
> +#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
> +#define _ASM_X86_RESTARTABLE_SEQUENCES_H
> +
> +#include <asm/processor.h>
> +#include <asm/ptrace.h>
> +#include <linux/sched.h>
> +
> +#ifdef CONFIG_RESTARTABLE_SEQUENCES
> +
> +static inline bool arch_rseq_in_crit_section(struct task_struct *p,
> +					     struct pt_regs *regs)
> +{
> +	struct task_struct *leader = p->group_leader;
> +	struct restartable_sequence_state *rseq_state = &leader->rseq_state;
> +
> +	unsigned long ip = (unsigned long)regs->ip;
> +	if (unlikely(ip < (unsigned long)rseq_state->crit_end &&
> +		     ip >= (unsigned long)rseq_state->crit_start))
> +		return true;
> +
> +	return false;
> +}
> +
> +static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
> +{
> +#ifdef CONFIG_PREEMPT
> +	/*
> +	 * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
> +	 * case that we took an interrupt during syscall entry.  Avoid this by
> +	 * always deferring to our notify-resume handler.
> +	 */
> +	return true;

I'm a bit puzzled about this. If I look at perf_get_regs_user() in the perf
code, task_pt_regs() seems to return the user-space pt_regs for a task with
a current->mm set (iow, not a kernel thread), even if an interrupt nests on
top of a system call. The only corner-case is NMIs, where an NMI may interrupt
in the middle of setting up the task pt_regs, but scheduling should never happen
there, right ?

Since it's impossible for kernel threads to have a rseq critical section,
we should be able to assume that every time task_pt_regs() returns a
non-userspace (user_mode(regs) != 0) pt_regs implies that scheduling applies
to a kernel thread. Therefore, following this line of thoughts,
arch_rseq_in_crit_section() should work for CONFIG_PREEMPT kernels too.

So what I am missing here ?

Thanks,

Mathieu

> +#else
> +	return arch_rseq_in_crit_section(p, task_pt_regs(p));
> +#endif
> +}
> +
> +void arch_rseq_handle_notify_resume(struct pt_regs *regs);
> +void arch_rseq_check_critical_section(struct task_struct *p,
> +				      struct pt_regs *regs);
> +
> +#else /* !CONFIG_RESTARTABLE_SEQUENCES */
> +
> +static inline void arch_rseq_handle_notify_resume(struct pt_regs *regs) {}
> +static inline void arch_rseq_check_critical_section(struct task_struct *p,
> +						    struct pt_regs *regs) {}
> +
> +#endif
> +
> +#endif /* _ASM_X86_RESTARTABLE_SEQUENCES_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index febaf18..bd7827d 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -113,6 +113,8 @@ obj-$(CONFIG_TRACING)			+= tracepoint.o
> obj-$(CONFIG_IOSF_MBI)			+= iosf_mbi.o
> obj-$(CONFIG_PMC_ATOM)			+= pmc_atom.o
> 
> +obj-$(CONFIG_RESTARTABLE_SEQUENCES)	+= restartable_sequences.o
> +
> ###
> # 64 bit specific files
> ifeq ($(CONFIG_X86_64),y)
> diff --git a/arch/x86/kernel/restartable_sequences.c
> b/arch/x86/kernel/restartable_sequences.c
> new file mode 100644
> index 0000000..3b38013
> --- /dev/null
> +++ b/arch/x86/kernel/restartable_sequences.c
> @@ -0,0 +1,69 @@
> +/*
> + * Restartable Sequences: x86 ABI.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
> + *
> + * Copyright (C) 2015, Google, Inc.,
> + * Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> and Andrew Hunter <ahh-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> + *
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/uaccess.h>
> +#include <asm/restartable_sequences.h>
> +
> +void arch_rseq_check_critical_section(struct task_struct *p,
> +				      struct pt_regs *regs)
> +{
> +	if (!arch_rseq_in_crit_section(p, regs))
> +		return;
> +
> +	/* RSEQ only applies to user-mode execution */
> +	BUG_ON(!user_mode(regs));
> +
> +	/*
> +	 * The ABI is slightly different for {32,64}-bit threads on x86
> +	 *
> +	 * Short version:
> +	 *   x86-64 (or x32): interrupted rip => %r10
> +	 *   i386:            interrupted rip => %ecx
> +	 *
> +	 * Longer version:
> +	 * The scratch registers available under the i386 function call ABI
> +	 * overlap with those used by argument registers under the x86_64 ABI.
> +	 *
> +	 * Given that the sequence block is already personality specific in
> +	 * that it must be entered by 'call' and that we always want the
> +	 * arguments available for a sequence restart; it's more natural to
> +	 * differentiate the ABI in these two cases.
> +	 */
> +	if (unlikely(test_tsk_thread_flag(p, TIF_IA32)))
> +		regs->cx = regs->ip; /* i386 */
> +	else
> +		regs->r10 = regs->ip; /* x86-64/x32 */
> +
> +	regs->ip = (unsigned long)p->group_leader->rseq_state.crit_restart;
> +}
> +
> +void arch_rseq_handle_notify_resume(struct pt_regs *regs)
> +{
> +	struct restartable_sequence_state *rseq_state = &current->rseq_state;
> +
> +	/* If this update fails our user-state is incoherent. */
> +	if (put_user(task_cpu(current), rseq_state->cpu_pointer))
> +		force_sig(SIGSEGV, current);
> +
> +	arch_rseq_check_critical_section(current, regs);
> +}
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index 206996c..987c50b 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -31,6 +31,7 @@
> #include <asm/vdso.h>
> #include <asm/mce.h>
> #include <asm/sighandling.h>
> +#include <asm/restartable_sequences.h>
> 
> #ifdef CONFIG_X86_64
> #include <asm/proto.h>
> @@ -617,6 +618,15 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> 	sigset_t *set = sigmask_to_save();
> 	compat_sigset_t *cset = (compat_sigset_t *) set;
> 
> +	/*
> +	 * If we are executing in the critical section of a restartable
> +	 * sequence we need to fix up the user's stack saved ip at this point
> +	 * so that signal handler return does not allow us to jump back into
> +	 * the block across a context switch boundary.
> +	 */
> +	if (rseq_active(current))
> +		arch_rseq_check_critical_section(current, regs);
> +
> 	/* Set up the stack frame */
> 	if (is_ia32_frame()) {
> 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
> @@ -755,6 +765,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32
> thread_info_flags)
> 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> 		clear_thread_flag(TIF_NOTIFY_RESUME);
> 		tracehook_notify_resume(regs);
> +		if (rseq_active(current))
> +			arch_rseq_handle_notify_resume(regs);
> 	}
> 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
> 		fire_user_return_notifiers();
> diff --git a/kernel/restartable_sequences.c b/kernel/restartable_sequences.c
> index 72945f2..9102241 100644
> --- a/kernel/restartable_sequences.c
> +++ b/kernel/restartable_sequences.c
> @@ -24,17 +24,22 @@
> 
> #ifdef CONFIG_RESTARTABLE_SEQUENCES
> 
> +#include <asm/restartable_sequences.h>
> #include <linux/uaccess.h>
> #include <linux/preempt.h>
> #include <linux/syscalls.h>
> 
> static void rseq_sched_in_nop(struct preempt_notifier *pn, int cpu) {}
> -static void rseq_sched_out_nop(struct preempt_notifier *pn,
> -			       struct task_struct *next) {}
> +static void rseq_sched_out(struct preempt_notifier *pn,
> +			   struct task_struct *next)
> +{
> +	if (arch_rseq_needs_notify_resume(current))
> +		set_thread_flag(TIF_NOTIFY_RESUME);
> +}
> 
> static __read_mostly struct preempt_ops rseq_preempt_ops = {
> 	.sched_in = rseq_sched_in_nop,
> -	.sched_out = rseq_sched_out_nop,
> +	.sched_out = rseq_sched_out,
> };
> 
>  int rseq_register_cpu_pointer_current(int __user *cpu_pointer)

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 2/3] restartable sequences: x86 ABI
       [not found]         ` <1050218158.4054.1435342186284.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
@ 2015-06-26 19:04           ` Mathieu Desnoyers
  2015-06-26 19:31           ` Andy Lutomirski
  1 sibling, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2015-06-26 19:04 UTC (permalink / raw)
  To: Paul Turner
  Cc: Peter Zijlstra, Paul E. McKenney, Andrew Hunter, Andi Kleen,
	Lai Jiangshan, linux-api, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	rostedt, Josh Triplett, Ingo Molnar, Andrew Morton,
	Andy Lutomirski, Linus Torvalds, Chris Lameter

----- On Jun 26, 2015, at 2:09 PM, Mathieu Desnoyers mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org wrote:

> ----- On Jun 24, 2015, at 6:26 PM, Paul Turner pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org wrote:
> 
>> Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
>> execution within restartable sequence sections.
>> 
>> With respect to the x86-specific ABI:
>>  On 32-bit:           Upon restart, the interrupted rip is placed in %ecx
>>  On 64-bit (or x32):  Upon restart, the interrupted rip is placed in %r10
>> 
>> While potentially surprising at first glance, this choice is strongly motivated
>> by the fact that the available scratch registers under the i386 function call
>> ABI overlap with those used as argument registers under x86_64.
>> 
>> Given that sequences are already personality specific and that we always want
>> the arguments to be available for sequence restart, it's much more natural to
>> ultimately differentiate the ABI in these two cases.
>> 
>> Signed-off-by: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> ---
>> arch/x86/include/asm/restartable_sequences.h |   50 +++++++++++++++++++
>> arch/x86/kernel/Makefile                     |    2 +
>> arch/x86/kernel/restartable_sequences.c      |   69 ++++++++++++++++++++++++++
>> arch/x86/kernel/signal.c                     |   12 +++++
>> kernel/restartable_sequences.c               |   11 +++-
>> 5 files changed, 141 insertions(+), 3 deletions(-)
>> create mode 100644 arch/x86/include/asm/restartable_sequences.h
>> create mode 100644 arch/x86/kernel/restartable_sequences.c
>> 
>> diff --git a/arch/x86/include/asm/restartable_sequences.h
>> b/arch/x86/include/asm/restartable_sequences.h
>> new file mode 100644
>> index 0000000..0ceb024
>> --- /dev/null
>> +++ b/arch/x86/include/asm/restartable_sequences.h
>> @@ -0,0 +1,50 @@
>> +#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
>> +#define _ASM_X86_RESTARTABLE_SEQUENCES_H
>> +
>> +#include <asm/processor.h>
>> +#include <asm/ptrace.h>
>> +#include <linux/sched.h>
>> +
>> +#ifdef CONFIG_RESTARTABLE_SEQUENCES
>> +
>> +static inline bool arch_rseq_in_crit_section(struct task_struct *p,
>> +					     struct pt_regs *regs)
>> +{
>> +	struct task_struct *leader = p->group_leader;
>> +	struct restartable_sequence_state *rseq_state = &leader->rseq_state;
>> +
>> +	unsigned long ip = (unsigned long)regs->ip;
>> +	if (unlikely(ip < (unsigned long)rseq_state->crit_end &&
>> +		     ip >= (unsigned long)rseq_state->crit_start))
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> +static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
>> +{
>> +#ifdef CONFIG_PREEMPT
>> +	/*
>> +	 * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
>> +	 * case that we took an interrupt during syscall entry.  Avoid this by
>> +	 * always deferring to our notify-resume handler.
>> +	 */
>> +	return true;
> 
> I'm a bit puzzled about this. If I look at perf_get_regs_user() in the perf
> code, task_pt_regs() seems to return the user-space pt_regs for a task with
> a current->mm set (iow, not a kernel thread), even if an interrupt nests on
> top of a system call. The only corner-case is NMIs, where an NMI may interrupt
> in the middle of setting up the task pt_regs, but scheduling should never happen
> there, right ?
> 
> Since it's impossible for kernel threads to have a rseq critical section,
> we should be able to assume that every time task_pt_regs() returns a
> non-userspace (user_mode(regs) != 0) pt_regs implies that scheduling applies
> to a kernel thread. Therefore, following this line of thoughts,
> arch_rseq_in_crit_section() should work for CONFIG_PREEMPT kernels too.
> 
> So what I am missing here ?

AFAIU, the comment near this check in perf_get_regs_user() is bogus.
It does not only apply to NMIs, but also applies to normal interrupt
handlers that nest over the stack setup on syscall entry (below
entry_SYSCALL_64_after_swapgs in entry_64.S):

        struct pt_regs *user_regs = task_pt_regs(current);

        /*
         * If we're in an NMI that interrupted task_pt_regs setup, then
         * we can't sample user regs at all.  This check isn't really
         * sufficient, though, as we could be in an NMI inside an interrupt
         * that happened during task_pt_regs setup.
         */
        if (regs->sp > (unsigned long)&user_regs->r11 &&
            regs->sp <= (unsigned long)(user_regs + 1)) {
                regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
                regs_user->regs = NULL;
                return;
        }

That would be how, for tracing, those races can be avoided. It
might not be a huge issue for perf samples to lose one sample once
in a while, but I understand that this statistical approach would
be incorrect in the context of RSEQ.

Moving ENABLE_INTERRUPTS(CLBR_NONE) 3 instructions down, just after
pushq   %rcx                            /* pt_regs->ip */
might solve your issue here. (in entry_SYSCALL_64_after_swapgs)

Thoughts ?

Thanks,

Mathieu


> 
> Thanks,
> 
> Mathieu
> 
>> +#else
>> +	return arch_rseq_in_crit_section(p, task_pt_regs(p));
>> +#endif
>> +}
>> +
>> +void arch_rseq_handle_notify_resume(struct pt_regs *regs);
>> +void arch_rseq_check_critical_section(struct task_struct *p,
>> +				      struct pt_regs *regs);
>> +
>> +#else /* !CONFIG_RESTARTABLE_SEQUENCES */
>> +
>> +static inline void arch_rseq_handle_notify_resume(struct pt_regs *regs) {}
>> +static inline void arch_rseq_check_critical_section(struct task_struct *p,
>> +						    struct pt_regs *regs) {}
>> +
>> +#endif
>> +
>> +#endif /* _ASM_X86_RESTARTABLE_SEQUENCES_H */
>> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
>> index febaf18..bd7827d 100644
>> --- a/arch/x86/kernel/Makefile
>> +++ b/arch/x86/kernel/Makefile
>> @@ -113,6 +113,8 @@ obj-$(CONFIG_TRACING)			+= tracepoint.o
>> obj-$(CONFIG_IOSF_MBI)			+= iosf_mbi.o
>> obj-$(CONFIG_PMC_ATOM)			+= pmc_atom.o
>> 
>> +obj-$(CONFIG_RESTARTABLE_SEQUENCES)	+= restartable_sequences.o
>> +
>> ###
>> # 64 bit specific files
>> ifeq ($(CONFIG_X86_64),y)
>> diff --git a/arch/x86/kernel/restartable_sequences.c
>> b/arch/x86/kernel/restartable_sequences.c
>> new file mode 100644
>> index 0000000..3b38013
>> --- /dev/null
>> +++ b/arch/x86/kernel/restartable_sequences.c
>> @@ -0,0 +1,69 @@
>> +/*
>> + * Restartable Sequences: x86 ABI.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>> + *
>> + * Copyright (C) 2015, Google, Inc.,
>> + * Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> and Andrew Hunter <ahh-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> + *
>> + */
>> +
>> +#include <linux/sched.h>
>> +#include <linux/uaccess.h>
>> +#include <asm/restartable_sequences.h>
>> +
>> +void arch_rseq_check_critical_section(struct task_struct *p,
>> +				      struct pt_regs *regs)
>> +{
>> +	if (!arch_rseq_in_crit_section(p, regs))
>> +		return;
>> +
>> +	/* RSEQ only applies to user-mode execution */
>> +	BUG_ON(!user_mode(regs));
>> +
>> +	/*
>> +	 * The ABI is slightly different for {32,64}-bit threads on x86
>> +	 *
>> +	 * Short version:
>> +	 *   x86-64 (or x32): interrupted rip => %r10
>> +	 *   i386:            interrupted rip => %ecx
>> +	 *
>> +	 * Longer version:
>> +	 * The scratch registers available under the i386 function call ABI
>> +	 * overlap with those used by argument registers under the x86_64 ABI.
>> +	 *
>> +	 * Given that the sequence block is already personality specific in
>> +	 * that it must be entered by 'call' and that we always want the
>> +	 * arguments available for a sequence restart; it's more natural to
>> +	 * differentiate the ABI in these two cases.
>> +	 */
>> +	if (unlikely(test_tsk_thread_flag(p, TIF_IA32)))
>> +		regs->cx = regs->ip; /* i386 */
>> +	else
>> +		regs->r10 = regs->ip; /* x86-64/x32 */
>> +
>> +	regs->ip = (unsigned long)p->group_leader->rseq_state.crit_restart;
>> +}
>> +
>> +void arch_rseq_handle_notify_resume(struct pt_regs *regs)
>> +{
>> +	struct restartable_sequence_state *rseq_state = &current->rseq_state;
>> +
>> +	/* If this update fails our user-state is incoherent. */
>> +	if (put_user(task_cpu(current), rseq_state->cpu_pointer))
>> +		force_sig(SIGSEGV, current);
>> +
>> +	arch_rseq_check_critical_section(current, regs);
>> +}
>> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
>> index 206996c..987c50b 100644
>> --- a/arch/x86/kernel/signal.c
>> +++ b/arch/x86/kernel/signal.c
>> @@ -31,6 +31,7 @@
>> #include <asm/vdso.h>
>> #include <asm/mce.h>
>> #include <asm/sighandling.h>
>> +#include <asm/restartable_sequences.h>
>> 
>> #ifdef CONFIG_X86_64
>> #include <asm/proto.h>
>> @@ -617,6 +618,15 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
>> 	sigset_t *set = sigmask_to_save();
>> 	compat_sigset_t *cset = (compat_sigset_t *) set;
>> 
>> +	/*
>> +	 * If we are executing in the critical section of a restartable
>> +	 * sequence we need to fix up the user's stack saved ip at this point
>> +	 * so that signal handler return does not allow us to jump back into
>> +	 * the block across a context switch boundary.
>> +	 */
>> +	if (rseq_active(current))
>> +		arch_rseq_check_critical_section(current, regs);
>> +
>> 	/* Set up the stack frame */
>> 	if (is_ia32_frame()) {
>> 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
>> @@ -755,6 +765,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32
>> thread_info_flags)
>> 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
>> 		clear_thread_flag(TIF_NOTIFY_RESUME);
>> 		tracehook_notify_resume(regs);
>> +		if (rseq_active(current))
>> +			arch_rseq_handle_notify_resume(regs);
>> 	}
>> 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
>> 		fire_user_return_notifiers();
>> diff --git a/kernel/restartable_sequences.c b/kernel/restartable_sequences.c
>> index 72945f2..9102241 100644
>> --- a/kernel/restartable_sequences.c
>> +++ b/kernel/restartable_sequences.c
>> @@ -24,17 +24,22 @@
>> 
>> #ifdef CONFIG_RESTARTABLE_SEQUENCES
>> 
>> +#include <asm/restartable_sequences.h>
>> #include <linux/uaccess.h>
>> #include <linux/preempt.h>
>> #include <linux/syscalls.h>
>> 
>> static void rseq_sched_in_nop(struct preempt_notifier *pn, int cpu) {}
>> -static void rseq_sched_out_nop(struct preempt_notifier *pn,
>> -			       struct task_struct *next) {}
>> +static void rseq_sched_out(struct preempt_notifier *pn,
>> +			   struct task_struct *next)
>> +{
>> +	if (arch_rseq_needs_notify_resume(current))
>> +		set_thread_flag(TIF_NOTIFY_RESUME);
>> +}
>> 
>> static __read_mostly struct preempt_ops rseq_preempt_ops = {
>> 	.sched_in = rseq_sched_in_nop,
>> -	.sched_out = rseq_sched_out_nop,
>> +	.sched_out = rseq_sched_out,
>> };
>> 
>>  int rseq_register_cpu_pointer_current(int __user *cpu_pointer)
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 2/3] restartable sequences: x86 ABI
       [not found]         ` <1050218158.4054.1435342186284.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
  2015-06-26 19:04           ` Mathieu Desnoyers
@ 2015-06-26 19:31           ` Andy Lutomirski
       [not found]             ` <CALCETrWKzP8UPH2OEmwbC4egcAa6NA+VkQD6OuA-LhFv-Aqg6Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-06-26 19:31 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Paul Turner, Peter Zijlstra, Paul E. McKenney, Andrew Hunter,
	Andi Kleen, Lai Jiangshan, linux-api,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, rostedt,
	Josh Triplett, Ingo Molnar, Andrew Morton, Linus Torvalds,
	Chris Lameter

On Fri, Jun 26, 2015 at 11:09 AM, Mathieu Desnoyers
<mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org> wrote:
> ----- On Jun 24, 2015, at 6:26 PM, Paul Turner pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org wrote:
>
>> Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
>> execution within restartable sequence sections.
>>
>> With respect to the x86-specific ABI:
>>  On 32-bit:           Upon restart, the interrupted rip is placed in %ecx
>>  On 64-bit (or x32):  Upon restart, the interrupted rip is placed in %r10
>>
>> While potentially surprising at first glance, this choice is strongly motivated
>> by the fact that the available scratch registers under the i386 function call
>> ABI overlap with those used as argument registers under x86_64.
>>
>> Given that sequences are already personality specific and that we always want
>> the arguments to be available for sequence restart, it's much more natural to
>> ultimately differentiate the ABI in these two cases.
>>
>> Signed-off-by: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> ---
>> arch/x86/include/asm/restartable_sequences.h |   50 +++++++++++++++++++
>> arch/x86/kernel/Makefile                     |    2 +
>> arch/x86/kernel/restartable_sequences.c      |   69 ++++++++++++++++++++++++++
>> arch/x86/kernel/signal.c                     |   12 +++++
>> kernel/restartable_sequences.c               |   11 +++-
>> 5 files changed, 141 insertions(+), 3 deletions(-)
>> create mode 100644 arch/x86/include/asm/restartable_sequences.h
>> create mode 100644 arch/x86/kernel/restartable_sequences.c
>>
>> diff --git a/arch/x86/include/asm/restartable_sequences.h
>> b/arch/x86/include/asm/restartable_sequences.h
>> new file mode 100644
>> index 0000000..0ceb024
>> --- /dev/null
>> +++ b/arch/x86/include/asm/restartable_sequences.h
>> @@ -0,0 +1,50 @@
>> +#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
>> +#define _ASM_X86_RESTARTABLE_SEQUENCES_H
>> +
>> +#include <asm/processor.h>
>> +#include <asm/ptrace.h>
>> +#include <linux/sched.h>
>> +
>> +#ifdef CONFIG_RESTARTABLE_SEQUENCES
>> +
>> +static inline bool arch_rseq_in_crit_section(struct task_struct *p,
>> +                                          struct pt_regs *regs)
>> +{
>> +     struct task_struct *leader = p->group_leader;
>> +     struct restartable_sequence_state *rseq_state = &leader->rseq_state;
>> +
>> +     unsigned long ip = (unsigned long)regs->ip;
>> +     if (unlikely(ip < (unsigned long)rseq_state->crit_end &&
>> +                  ip >= (unsigned long)rseq_state->crit_start))
>> +             return true;
>> +
>> +     return false;
>> +}
>> +
>> +static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
>> +{
>> +#ifdef CONFIG_PREEMPT
>> +     /*
>> +      * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
>> +      * case that we took an interrupt during syscall entry.  Avoid this by
>> +      * always deferring to our notify-resume handler.
>> +      */
>> +     return true;
>
> I'm a bit puzzled about this. If I look at perf_get_regs_user() in the perf
> code, task_pt_regs() seems to return the user-space pt_regs for a task with
> a current->mm set (iow, not a kernel thread), even if an interrupt nests on
> top of a system call. The only corner-case is NMIs, where an NMI may interrupt
> in the middle of setting up the task pt_regs, but scheduling should never happen
> there, right ?

Careful, here!  task_pt_regs returns a pointer to the place where regs
would be if they were fully initialized.  We can certainly take an
interrupt in the middle of pt_regs setup (entry_SYSCALL_64 enables
interrupts very early, for example).  To me, the question is whether
we can ever be preemptable at such a time.

It's a bit worse, though: we can certainly be preemptible when other
code is accessing pt_regs.  clone, execve, sigreturn, and signal
delivery come to mind.

Why don't we give up on poking at user state from the scheduler and do
it on exit to user mode instead?  Starting in 4.3 (hopefully landing
in -tip in a week or two), we should have a nice function
prepare_exit_to_usermode that runs with well-defined state,
non-reentrantly, that can do whatever you want here, *including user
memory access*.

The remaining question would be what the ABI should be.

Could we get away with a vDSO function along the lines of "set *A=B
and *X=Y if we're on cpu N and *X=Z"?  Straight-up cmpxchg would be
even simpler.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 2/3] restartable sequences: x86 ABI
       [not found]             ` <CALCETrWKzP8UPH2OEmwbC4egcAa6NA+VkQD6OuA-LhFv-Aqg6Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-06-27  1:33               ` Paul Turner
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Turner @ 2015-06-27  1:33 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Mathieu Desnoyers, Peter Zijlstra, Paul E. McKenney,
	Andrew Hunter, Andi Kleen, Lai Jiangshan, linux-api,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, rostedt,
	Josh Triplett, Ingo Molnar, Andrew Morton, Linus Torvalds,
	Chris Lameter

On Fri, Jun 26, 2015 at 12:31 PM, Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> wrote:
> On Fri, Jun 26, 2015 at 11:09 AM, Mathieu Desnoyers
> <mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org> wrote:
>> ----- On Jun 24, 2015, at 6:26 PM, Paul Turner pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org wrote:
>>
>>> Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
>>> execution within restartable sequence sections.
>>>
>>> With respect to the x86-specific ABI:
>>>  On 32-bit:           Upon restart, the interrupted rip is placed in %ecx
>>>  On 64-bit (or x32):  Upon restart, the interrupted rip is placed in %r10
>>>
>>> While potentially surprising at first glance, this choice is strongly motivated
>>> by the fact that the available scratch registers under the i386 function call
>>> ABI overlap with those used as argument registers under x86_64.
>>>
>>> Given that sequences are already personality specific and that we always want
>>> the arguments to be available for sequence restart, it's much more natural to
>>> ultimately differentiate the ABI in these two cases.
>>>
>>> Signed-off-by: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>>> ---
>>> arch/x86/include/asm/restartable_sequences.h |   50 +++++++++++++++++++
>>> arch/x86/kernel/Makefile                     |    2 +
>>> arch/x86/kernel/restartable_sequences.c      |   69 ++++++++++++++++++++++++++
>>> arch/x86/kernel/signal.c                     |   12 +++++
>>> kernel/restartable_sequences.c               |   11 +++-
>>> 5 files changed, 141 insertions(+), 3 deletions(-)
>>> create mode 100644 arch/x86/include/asm/restartable_sequences.h
>>> create mode 100644 arch/x86/kernel/restartable_sequences.c
>>>
>>> diff --git a/arch/x86/include/asm/restartable_sequences.h
>>> b/arch/x86/include/asm/restartable_sequences.h
>>> new file mode 100644
>>> index 0000000..0ceb024
>>> --- /dev/null
>>> +++ b/arch/x86/include/asm/restartable_sequences.h
>>> @@ -0,0 +1,50 @@
>>> +#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
>>> +#define _ASM_X86_RESTARTABLE_SEQUENCES_H
>>> +
>>> +#include <asm/processor.h>
>>> +#include <asm/ptrace.h>
>>> +#include <linux/sched.h>
>>> +
>>> +#ifdef CONFIG_RESTARTABLE_SEQUENCES
>>> +
>>> +static inline bool arch_rseq_in_crit_section(struct task_struct *p,
>>> +                                          struct pt_regs *regs)
>>> +{
>>> +     struct task_struct *leader = p->group_leader;
>>> +     struct restartable_sequence_state *rseq_state = &leader->rseq_state;
>>> +
>>> +     unsigned long ip = (unsigned long)regs->ip;
>>> +     if (unlikely(ip < (unsigned long)rseq_state->crit_end &&
>>> +                  ip >= (unsigned long)rseq_state->crit_start))
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
>>> +{
>>> +#ifdef CONFIG_PREEMPT
>>> +     /*
>>> +      * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
>>> +      * case that we took an interrupt during syscall entry.  Avoid this by
>>> +      * always deferring to our notify-resume handler.
>>> +      */
>>> +     return true;
>>
>> I'm a bit puzzled about this. If I look at perf_get_regs_user() in the perf
>> code, task_pt_regs() seems to return the user-space pt_regs for a task with
>> a current->mm set (iow, not a kernel thread), even if an interrupt nests on
>> top of a system call. The only corner-case is NMIs, where an NMI may interrupt
>> in the middle of setting up the task pt_regs, but scheduling should never happen
>> there, right ?
>
> Careful, here!  task_pt_regs returns a pointer to the place where regs
> would be if they were fully initialized.  We can certainly take an
> interrupt in the middle of pt_regs setup (entry_SYSCALL_64 enables
> interrupts very early, for example).  To me, the question is whether
> we can ever be preemptable at such a time.
>
> It's a bit worse, though: we can certainly be preemptible when other
> code is accessing pt_regs.  clone, execve, sigreturn, and signal
> delivery come to mind.

Yeah Andy covered it exactly: interrupt in pt_regs setup.

With respect to whether we can be preemptible; I think we were
concerned about rescheduling during syscall entry but I'd have to
re-audit the current state of entry_64.S :)

Mathieu also wrote:
> Moving ENABLE_INTERRUPTS(CLBR_NONE) 3 instructions down, just after
> pushq   %rcx                            /* pt_regs->ip */
> might solve your issue here. (in entry_SYSCALL_64_after_swapgs)

We considered doing something exactly like this; but I think any
potential changes here should be made in isolation of this series.

>
> Why don't we give up on poking at user state from the scheduler and do
> it on exit to user mode instead?  Starting in 4.3 (hopefully landing
> in -tip in a week or two), we should have a nice function
> prepare_exit_to_usermode that runs with well-defined state,
> non-reentrantly, that can do whatever you want here, *including user
> memory access*.

So this series already does the exact approximation of that:
The only thing we touch in the scheduler is looking at the kernel copy
pt_regs in the case we know it's safe to.

The entirety of *any* poking (both current cpu pointer updates and
potential rip manipulation) at user-state exactly happens in the
exit-to-user path via TIF_NOTIFY_RESUME.


>
> The remaining question would be what the ABI should be.
>
> Could we get away with a vDSO function along the lines of "set *A=B
> and *X=Y if we're on cpu N and *X=Z"?  Straight-up cmpxchg would be
> even simpler.

The short answer is yes [*]; but I don't think it should live in the vDSO.

a) vdso-Call overhead is fairly high
b) I don't think there are any properties of being in the vDSO that we
benefit from.
c) It would be nice if these sequences were inlinable.

I have an alternate implementation that satisfies (c) which I'm
looking to propose early next week (I've got it baking on some tests
over the weekend).

[*]  I mean the very simplest implementation of taking this patch and
putting the implementation of the critical section is clearly
sufficient.

Moving ENABLE_INTERRUPTS(CLBR_NONE) 3 instructions down, just after
pushq   %rcx                            /* pt_regs->ip */
might solve your issue here. (in entry_SYSCALL_64_after_swapgs)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC PATCH 0/3] restartable sequences benchmarks
@ 2015-10-22 18:06 Dave Watson
       [not found] ` <cover.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Dave Watson @ 2015-10-22 18:06 UTC (permalink / raw)
  To: davejwatson-b10kYP2dOMg, kernel-team-b10kYP2dOMg,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, pjt-hpIqsD4AKlfQT0dZR+AlfA,
	mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w

We've been testing out restartable sequences + malloc changes for use
at Facebook.  Below are some test results, as well as some possible
changes based on Paul Turner's original patches

https://lkml.org/lkml/2015/6/24/665

I ran one service with several permutations of various mallocs.  The
service is CPU-bound, and hits the allocator quite hard.  Requests/s
are held constant at the source, so we use cpu idle time and latency
as an indicator of service quality. These are average numbers over
several hours.  Machines were dual E5-2660, total 16 cores +
hyperthreading.  This service has ~400 total threads, 70-90 of which
are doing work at any particular time.

                                   RSS CPUIDLE LATENCYMS
jemalloc 4.0.0                     31G   33%     390
jemalloc + this patch              25G   33%     390
jemalloc + this patch using lsl    25G   30%     420
jemalloc + PT's rseq patch         25G   32%     405
glibc malloc 2.20                  27G   30%     420
tcmalloc gperftools trunk (2.2)    21G   30%     480

jemalloc rseq patch used for testing:
https://github.com/djwatson/jemalloc

lsl test - using lsl segment limit to get cpu (i.e. inlined vdso
getcpu on x86) instead of using the thread caching as in this patch.
There has been some suggestions to add the thread-cached getcpu()
feature separately.  It does seem to move the needle in a real service
by about ~3% to have a thread-cached getcpu vs. not.  I don't think we
can use restartable sequences in production without a faster getcpu.

GS-segment / migration only tests

There's been some interest in seeing if we can do this with only gs
segment, here's some numbers for those.  This doesn't have to be gs,
it could just be a migration signal sent to userspace as well, the
same approaches would apply.

GS patch: https://lkml.org/lkml/2014/9/13/59

                                   RSS CPUIDLE LATENCYMS
jemalloc 4.0.0                     31G   33%     390
jemalloc + percpu locking          25G   25%     420
jemalloc + preempt lock / signal   25G   32%     415

* Percpu locking - just lock everything percpu all the time.  If
  scheduled off during the critical section, other threads have to
  wait.

* 'Preempt lock' idea is that we grab a lock, but if we miss the lock,
  send a signal to the offending thread (tid is stored in the lock
  variable) to restart its critical section.  Libunwind was used to
  fixup ips in the signal handler, walking all the frames.  This is
  slower than the kernel preempt check, but happens less often - only
  if there was a preempt during the critical section.  Critical
  sections were inlined using the same scheme as in this patch.  There
  is more overhead than restartable sequences in the hot path (an
  extra unlocked cmpxchg, some accounting). Microbenchmarks showed it
  was 2x slower than rseq, but still faster than atomics.

  Roughly like this: https://gist.github.com/djwatson/9c268681a0dfa797990c

* I also tried a percpu version of stm (software transactional
  memory), but could never write anything better than ~3x slower than
  atomics in a microbenchmark.  I didn't test this in a real service.

Attached are two changes to the original patch:

1) Support more than one critical memory range in the kernel using
   binary search.  This has several advantages:

  * We don't need an extra register ABI to support multiplexing them
    in userspace.  This also avoids some complexity knowing which
    registers/flags might be smashed by a restart.

  * There are no collisions between shared libraries

  * They can be inlined with gcc inline asm.  With optimization on,
    gcc correctly inlines and registers many more regions.  In a real
    service this does seem to improve latency a hair.  A
    microbenchmark shows ~20% faster.

Downsides:  Less control over how we search/jump to the regions, but I
didn't notice any difference in testing a reasonable number of regions
(less than 100).  We could set a max limit?

2) Additional checks in ptrace to single step over critical sections.
   We also prevent setting breakpoints, as these also seem to confuse
   gdb sometimes.

Dave Watson (3):
  restartable sequences: user-space per-cpu critical sections
  restartable sequences: x86 ABI
  restartable sequences: basic user-space self-tests

 arch/Kconfig                                       |   7 +
 arch/x86/Kconfig                                   |   1 +
 arch/x86/entry/common.c                            |   3 +
 arch/x86/entry/syscalls/syscall_64.tbl             |   1 +
 arch/x86/include/asm/restartable_sequences.h       |  44 +++
 arch/x86/kernel/Makefile                           |   2 +
 arch/x86/kernel/ptrace.c                           |   6 +-
 arch/x86/kernel/restartable_sequences.c            |  47 +++
 arch/x86/kernel/signal.c                           |  12 +-
 fs/exec.c                                          |   3 +-
 include/linux/sched.h                              |  39 +++
 include/uapi/asm-generic/unistd.h                  |   4 +-
 init/Kconfig                                       |   9 +
 kernel/Makefile                                    |   2 +-
 kernel/fork.c                                      |   1 +
 kernel/ptrace.c                                    |  15 +-
 kernel/restartable_sequences.c                     | 255 ++++++++++++++++
 kernel/sched/core.c                                |   5 +
 kernel/sched/sched.h                               |   3 +
 kernel/sys_ni.c                                    |   3 +
 tools/testing/selftests/rseq/Makefile              |  14 +
 .../testing/selftests/rseq/basic_percpu_ops_test.c | 331 +++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.c                |  48 +++
 tools/testing/selftests/rseq/rseq.h                |  17 ++
 24 files changed, 862 insertions(+), 10 deletions(-)
 create mode 100644 arch/x86/include/asm/restartable_sequences.h
 create mode 100644 arch/x86/kernel/restartable_sequences.c
 create mode 100644 kernel/restartable_sequences.c
 create mode 100644 tools/testing/selftests/rseq/Makefile
 create mode 100644 tools/testing/selftests/rseq/basic_percpu_ops_test.c
 create mode 100644 tools/testing/selftests/rseq/rseq.c
 create mode 100644 tools/testing/selftests/rseq/rseq.h

-- 
2.4.6

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC PATCH 1/3] restartable sequences: user-space per-cpu critical sections
       [not found] ` <cover.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
@ 2015-10-22 18:06   ` Dave Watson
       [not found]     ` <581c0b6403285219724961a3d250c6d95dfacea7.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Dave Watson @ 2015-10-22 18:06 UTC (permalink / raw)
  To: davejwatson-b10kYP2dOMg, kernel-team-b10kYP2dOMg,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, pjt-hpIqsD4AKlfQT0dZR+AlfA,
	mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w

Introduce the notion of 'restartable sequence'.  This is a user-defined range
within which we guarantee user-execution will occur serially with respect
to scheduling events such as migration or competition with other threads.

Preemption, or other interruption within this region, results in control being
transferred to a user-defined restart handler when rescheduled.  This handler
may arrange for the original operation to be retried, including potentially
resynchronizing with dependent state that may have been updated in the interim.

This may be used in combination with an in-memory cpu-id to allow user programs
to implement cpu-local data-structures and primitives, without the use/overhead
of any atomics.

The kernel ABI generally consists of:
- A critical region, with start, end and restart addresses
- A (per-thread) memory location which will be kept current with its cpu

The definition of the above is performed via a new syscall,
  SYSCALL_DEFINE5(restartable_sequences,
                  int, op, int, flags, long, val1, long, val2, long, val3)

There are currently 2 possible operations,
  1) Configure the critical region(s)
  2) Configure the per-thread cpu pointer

[ See kernel/restartable_sequences.c for full documentation ]

A thread that has not configured (2) will not be restarted when executing in
(1).

This patch introduces the general framework for configuration, as well as
exposing the syscall.  We minimally expose x86 as having support (even though
the actual ABI is added by a subsequent patch) so that this can be compile
tested in isolation.

Ptrace is modified to avoid setting a breakpoint in the critical region,
since doing so would always restart the critical section, and may
not work correctly if the breakpoint is also the restart address.
---
 arch/Kconfig                      |   7 ++
 arch/x86/Kconfig                  |   1 +
 fs/exec.c                         |   3 +-
 include/linux/sched.h             |  39 ++++++
 include/uapi/asm-generic/unistd.h |   4 +-
 init/Kconfig                      |   9 ++
 kernel/Makefile                   |   2 +-
 kernel/fork.c                     |   1 +
 kernel/ptrace.c                   |  15 ++-
 kernel/restartable_sequences.c    | 250 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c               |   5 +
 kernel/sched/sched.h              |   3 +
 kernel/sys_ni.c                   |   3 +
 13 files changed, 335 insertions(+), 7 deletions(-)
 create mode 100644 kernel/restartable_sequences.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 4e949e5..93c18fa 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -241,6 +241,13 @@ config HAVE_REGS_AND_STACK_ACCESS_API
 	  declared in asm/ptrace.h
 	  For example the kprobes-based event tracer needs this API.
 
+config HAVE_RESTARTABLE_SEQUENCE_SUPPORT
+	bool
+	depends on HAVE_REGS_AND_STACK_ACCESS_API
+	help
+	  This symbol should be selected by an architecture if it supports an
+	  implementation of restartable sequences.
+
 config HAVE_CLK
 	bool
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96d058a..865e795 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,6 +112,7 @@ config X86
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK	if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_RESTARTABLE_SEQUENCE_SUPPORT
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
diff --git a/fs/exec.c b/fs/exec.c
index b06623a..aa94834 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
  * current->executable is only used by the procfs.  This allows a dispatch
  * table to check for several different types  of binary formats.  We keep
  * trying until we recognize the file or we run out of supported binary
- * formats. 
+ * formats.
  */
 
 #include <linux/slab.h>
@@ -1596,6 +1596,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	current->in_execve = 0;
 	acct_update_integrals(current);
 	task_numa_free(current);
+	rseq_clear_state_exec(current);
 	free_bprm(bprm);
 	kfree(pathbuf);
 	putname(filename);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501..a7b6e24 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1182,6 +1182,31 @@ struct mempolicy;
 struct pipe_inode_info;
 struct uts_namespace;
 
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+struct restartable_sequence_section {
+	/* Start and end of an address space's critical section. */
+	struct rb_node node;
+	void __user *crit_start, __user *crit_end, __user *crit_restart;
+};
+struct restartable_sequence_state {
+	struct rb_root sections;
+	/* Thread's current CPU, typically in TLS. */
+	int __user *cpu_pointer;
+	struct preempt_notifier notifier;
+};
+
+void rseq_clear_state_exec(struct task_struct *p);
+unsigned long rseq_lookup(struct task_struct *p, unsigned long ip);
+#else
+static inline void rseq_clear_state_exec(struct task_struct *p) {}
+static inline void rseq_fork(struct task_struct *p) {}
+static inline unsigned long
+rseq_lookup(struct task_struct *p, unsigned long ip)
+{
+	return 0;
+}
+#endif
+
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
@@ -1811,6 +1836,11 @@ struct task_struct {
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 	unsigned long	task_state_change;
 #endif
+
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+	struct restartable_sequence_state rseq_state;
+#endif
+
 	int pagefault_disabled;
 /* CPU-specific state of this task */
 	struct thread_struct thread;
@@ -3180,4 +3210,13 @@ static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+static inline int rseq_active(struct task_struct *p)
+{
+	return p->rseq_state.cpu_pointer != NULL;
+}
+#else
+static inline int rseq_active(struct task_struct *p) { return 0; }
+#endif
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index ee12400..9659f31 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -713,9 +713,11 @@ __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
 __SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_restartable_sequences 284
+__SYSCALL(__NR_restartable_sequences, sys_restartable_sequences)
 
 #undef __NR_syscalls
-#define __NR_syscalls 284
+#define __NR_syscalls 285
 
 /*
  * All syscalls below here should go away really,
diff --git a/init/Kconfig b/init/Kconfig
index c24b6f7..9b4a180 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2042,6 +2042,15 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
+config RESTARTABLE_SEQUENCES
+	bool "Userspace Restartable Sequences (RSEQ)"
+	default n
+	depends on HAVE_RESTARTABLE_SEQUENCE_SUPPORT && PREEMPT_NOTIFIERS
+	help
+	  Allows binaries to define a region of user-text within which
+	  execution will be restarted in the event of signal delivery or
+	  preemption.
+
 config PADATA
 	depends on SMP
 	bool
diff --git a/kernel/Makefile b/kernel/Makefile
index 53abf00..dbe6963 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -101,8 +101,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
-
 obj-$(CONFIG_HAS_IOMEM) += memremap.o
+obj-$(CONFIG_RESTARTABLE_SEQUENCES) += restartable_sequences.o
 
 $(obj)/configs.o: $(obj)/config_data.h
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 2845623..aa3ba1e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -252,6 +252,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(tsk == current);
 
 	task_numa_free(tsk);
+	rseq_clear_state_exec(tsk);
 	security_task_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 787320d..63935bc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -825,7 +825,10 @@ int ptrace_request(struct task_struct *child, long request,
 		return generic_ptrace_peekdata(child, addr, data);
 	case PTRACE_POKETEXT:
 	case PTRACE_POKEDATA:
-		return generic_ptrace_pokedata(child, addr, data);
+		/* Don't breakpoint restartable sequences */
+		if (!rseq_lookup(child, addr))
+			return generic_ptrace_pokedata(child, addr, data);
+		break;
 
 #ifdef PTRACE_OLDSETOPTIONS
 	case PTRACE_OLDSETOPTIONS:
@@ -1116,7 +1119,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 	compat_ulong_t __user *datap = compat_ptr(data);
 	compat_ulong_t word;
 	siginfo_t siginfo;
-	int ret;
+	int ret = -EIO;
 
 	switch (request) {
 	case PTRACE_PEEKTEXT:
@@ -1130,8 +1133,12 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 
 	case PTRACE_POKETEXT:
 	case PTRACE_POKEDATA:
-		ret = access_process_vm(child, addr, &data, sizeof(data), 1);
-		ret = (ret != sizeof(data) ? -EIO : 0);
+		/* Don't breakpoint restartable sequences */
+		if (!rseq_lookup(child, addr)) {
+			ret = access_process_vm(
+				child, addr, &data, sizeof(data), 1);
+			ret = (ret != sizeof(data) ? -EIO : 0);
+		}
 		break;
 
 	case PTRACE_GETEVENTMSG:
diff --git a/kernel/restartable_sequences.c b/kernel/restartable_sequences.c
new file mode 100644
index 0000000..72cfa9b
--- /dev/null
+++ b/kernel/restartable_sequences.c
@@ -0,0 +1,250 @@
+/*
+ * Restartable Sequences are a lightweight interface that allows user-level
+ * code to be executed atomically relative to scheduler preemption.  Typically
+ * used for implementing per-cpu operations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015, Google, Inc.,
+ * Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> and Andrew Hunter <ahh-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
+ *
+ */
+
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+
+#include <linux/uaccess.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+static void rseq_sched_in_nop(struct preempt_notifier *pn, int cpu) {}
+static void rseq_sched_out_nop(struct preempt_notifier *pn,
+			       struct task_struct *next) {}
+
+static __read_mostly struct preempt_ops rseq_preempt_ops = {
+	.sched_in = rseq_sched_in_nop,
+	.sched_out = rseq_sched_out_nop,
+};
+
+unsigned long rseq_lookup(struct task_struct *p, unsigned long ip)
+{
+	struct task_struct *leader = p->group_leader;
+	struct restartable_sequence_state *rseq_state = &leader->rseq_state;
+	struct restartable_sequence_section *item;
+
+	struct rb_node *node = rseq_state->sections.rb_node;
+
+	while (node) {
+		item = container_of(
+			node, struct restartable_sequence_section, node);
+		if (ip < (unsigned long)item->crit_start)
+			node = node->rb_left;
+		else if (ip >= (unsigned long)item->crit_end)
+			node = node->rb_right;
+		else
+			return (unsigned long)item->crit_restart;
+	}
+
+	return 0;
+}
+
+int rseq_register_cpu_pointer(struct task_struct *p, int __user *cpu_pointer)
+{
+	struct restartable_sequence_state *rseq_state =
+		&p->rseq_state;
+	int registered = 0, rc = 0;
+
+	if (cpu_pointer == rseq_state->cpu_pointer)
+		return 0;
+
+	if (cpu_pointer && !access_ok(VERIFY_WRITE, cpu_pointer, sizeof(int)))
+		return -EINVAL;
+
+	rcu_read_lock();
+	/* Group leader always holds critical section definition. */
+	if (cpu_pointer && !current->group_leader->rseq_state.cpu_pointer &&
+		current->group_leader != p) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	smp_rmb();  /* Pairs with setting group_leaders cpu_pointer */
+
+	if (rseq_state->cpu_pointer)
+		registered = 1;
+	rseq_state->cpu_pointer = cpu_pointer;
+
+	if (cpu_pointer && !registered) {
+		preempt_notifier_inc();
+
+		preempt_notifier_init(&rseq_state->notifier,
+				      &rseq_preempt_ops);
+		preempt_notifier_register(&rseq_state->notifier);
+	} else if (!cpu_pointer && registered) {
+		preempt_notifier_unregister(&rseq_state->notifier);
+
+		preempt_notifier_dec();
+	}
+
+	/* Will update *cpu_pointer on return. */
+	if (cpu_pointer)
+		set_thread_flag(TIF_NOTIFY_RESUME);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return 0;
+}
+
+void rseq_clear_state_exec(struct task_struct *task)
+{
+	struct restartable_sequence_section *section;
+	struct rb_node *node;
+
+	/* Ensure notifier is disabled. */
+	rseq_register_cpu_pointer(task, NULL);
+
+	/* Free and reinit */
+	while ((node = rb_first(&task->rseq_state.sections))) {
+		section = rb_entry(node,
+				struct restartable_sequence_section, node);
+		rb_erase(&section->node, &task->rseq_state.sections);
+		kfree(section);
+	}
+
+	memset(&task->rseq_state, 0, sizeof(task->rseq_state));
+	task->rseq_state.sections = RB_ROOT;
+}
+
+static DEFINE_MUTEX(rseq_state_mutex);
+
+int rseq_register_critical_current(__user void *start, __user void *end,
+				__user void *restart)
+{
+	struct restartable_sequence_state *rseq_state;
+	struct restartable_sequence_section *section;
+	struct rb_node **new, *parent = NULL;
+	int rc = 0;
+
+	rcu_read_lock();
+	/* The critical section is shared by all threads in a process. */
+	rseq_state = &current->group_leader->rseq_state;
+
+	/* Verify section */
+	if (start >= end) {
+		rc = -EINVAL;
+		goto out_rcu;
+	}
+
+	if (!access_ok(VERIFY_READ, start, end - start) ||
+		!access_ok(VERIFY_READ, restart, 1)) {
+		rc = -EINVAL;
+		goto out_rcu;
+	}
+
+	if (rseq_state->cpu_pointer) {
+		rc = -EBUSY;
+		goto out_rcu;
+	}
+
+	new = &(rseq_state->sections.rb_node);
+
+	section = kmalloc(
+		sizeof(struct restartable_sequence_section), GFP_KERNEL);
+	if (!section) {
+		rc = -ENOMEM;
+		goto out_rcu;
+	}
+	section->crit_end = end;
+	section->crit_start = start;
+	section->crit_restart = restart;
+
+	mutex_lock(&rseq_state_mutex);
+
+	while (*new) {
+		struct restartable_sequence_section *this = container_of(
+			*new, struct restartable_sequence_section, node);
+
+		parent = *new;
+		if (section->crit_end <= this->crit_start)
+			new = &((*new)->rb_left);
+		else if (section->crit_start >= this->crit_end)
+			new = &((*new)->rb_right);
+		else {
+			/* Prevent overlapping regions */
+			kfree(section);
+			rc = -EBUSY;
+			goto out_lock;
+		}
+	}
+
+	rb_link_node(&section->node, parent, new);
+	rb_insert_color(&section->node, &rseq_state->sections);
+
+out_lock:
+	mutex_unlock(&rseq_state_mutex);
+out_rcu:
+
+	smp_wmb();  /* synchronize visibility of new section */
+
+	rcu_read_unlock();
+	return rc;
+}
+
+#define SYS_RSEQ_SET_CRITICAL		0
+#define SYS_RSEQ_SET_CPU_POINTER	1
+
+/*
+ * RSEQ syscall interface.
+ *
+ * Usage:
+ *   SYS_RSEQ_SET_CRITICAL, flags, crit_start, crit_end, crit_restart)
+ *    A thread with user rip in (crit_start, crit_end] that has called
+ *    RSEQ_SET_CPU_POINTER will have its execution resumed at crit_restart
+ *    when interrupted by preemption or signal.
+ *
+ *   SYS_RSEQ_SET_CPU_POINTER, flags, cpu_pointer_address
+ *    Configures a (typically per-thread) value, containing the cpu which that
+ *    thread is currently executing on.
+ *    REQUIRES: SYS_RSEQ_SET_CRITICAL must have previously been called.
+ *
+ *  flags is currently unused.
+ */
+SYSCALL_DEFINE5(restartable_sequences,
+		int, op, int, flags, long, val1, long, val2, long, val3)
+{
+	int rc = -EINVAL;
+
+	if (op == SYS_RSEQ_SET_CRITICAL) {
+		/* Defines (process-wide) critical section. */
+		__user void *crit_start = (__user void *)val1;
+		__user void *crit_end = (__user void *)val2;
+		__user void *crit_restart = (__user void *)val3;
+
+		rc = rseq_register_critical_current(
+			crit_start, crit_end, crit_restart);
+	} else if (op == SYS_RSEQ_SET_CPU_POINTER) {
+		/*
+		 * Enables RSEQ for this thread; sets location for CPU update
+		 * to val1.
+		 */
+		int __user *cpu = (int __user *)val1;
+
+		rc = rseq_register_cpu_pointer(current, cpu);
+	}
+
+	return rc;
+}
+#else
+SYSCALL_DEFINE0(restartable_sequences)
+{
+	return -ENOSYS;
+}
+#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 10a8faa..1e192f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2112,6 +2112,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	p->numa_group = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
+
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+	memset(&p->rseq_state, 0, sizeof(p->rseq_state));
+	p->rseq_state.sections = RB_ROOT;
+#endif
 }
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6d2a119..c7fb1a6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -953,6 +953,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
 	set_task_rq(p, cpu);
 #ifdef CONFIG_SMP
+	if (rseq_active(p))
+		set_tsk_thread_flag(p, TIF_NOTIFY_RESUME);
+
 	/*
 	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
 	 * successfuly executed on another CPU. We must ensure that updates of
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a02decf..d396884 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -248,3 +248,6 @@ cond_syscall(sys_execveat);
 
 /* membarrier */
 cond_syscall(sys_membarrier);
+
+/* restartable sequences */
+cond_syscall(sys_restartable_sequences);
-- 
2.4.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH 2/3] restartable sequences: x86 ABI
  2015-10-22 18:06 [RFC PATCH 0/3] restartable sequences benchmarks Dave Watson
       [not found] ` <cover.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
@ 2015-10-22 18:06 ` Dave Watson
  2015-10-22 18:07 ` [RFC PATCH 3/3] restartable sequences: basic user-space self-tests Dave Watson
  2015-10-22 19:11 ` [RFC PATCH 0/3] restartable sequences benchmarks Andy Lutomirski
  3 siblings, 0 replies; 13+ messages in thread
From: Dave Watson @ 2015-10-22 18:06 UTC (permalink / raw)
  To: davejwatson, kernel-team, linux-kernel, linux-api, pjt,
	mathieu.desnoyers

Implements the x86 (i386 & x86-64) ABIs for interrupting and restarting
execution within restartable sequence sections.

Ptrace is modified to single step over the entire critical region.
---
 arch/x86/entry/common.c                      |  3 ++
 arch/x86/entry/syscalls/syscall_64.tbl       |  1 +
 arch/x86/include/asm/restartable_sequences.h | 44 ++++++++++++++++++++++++++
 arch/x86/kernel/Makefile                     |  2 ++
 arch/x86/kernel/ptrace.c                     |  6 ++--
 arch/x86/kernel/restartable_sequences.c      | 47 ++++++++++++++++++++++++++++
 arch/x86/kernel/signal.c                     | 12 ++++++-
 kernel/restartable_sequences.c               | 11 +++++--
 8 files changed, 120 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/include/asm/restartable_sequences.h
 create mode 100644 arch/x86/kernel/restartable_sequences.c

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 80dcc92..e817f04 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -24,6 +24,7 @@
 
 #include <asm/desc.h>
 #include <asm/traps.h>
+#include <asm/restartable_sequences.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -253,6 +254,8 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
 		if (cached_flags & _TIF_NOTIFY_RESUME) {
 			clear_thread_flag(TIF_NOTIFY_RESUME);
 			tracehook_notify_resume(regs);
+			if (rseq_active(current))
+				arch_rseq_handle_notify_resume(regs);
 		}
 
 		if (cached_flags & _TIF_USER_RETURN_NOTIFY)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842f..0fd4243 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -331,6 +331,7 @@
 322	64	execveat		stub_execveat
 323	common	userfaultfd		sys_userfaultfd
 324	common	membarrier		sys_membarrier
+325	common	restartable_sequences	sys_restartable_sequences
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/asm/restartable_sequences.h b/arch/x86/include/asm/restartable_sequences.h
new file mode 100644
index 0000000..c0bcab2
--- /dev/null
+++ b/arch/x86/include/asm/restartable_sequences.h
@@ -0,0 +1,44 @@
+#ifndef _ASM_X86_RESTARTABLE_SEQUENCES_H
+#define _ASM_X86_RESTARTABLE_SEQUENCES_H
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_RESTARTABLE_SEQUENCES
+
+static inline unsigned long arch_rseq_in_crit_section(struct task_struct *p,
+						struct pt_regs *regs)
+{
+	unsigned long ip = (unsigned long)regs->ip;
+
+	return rseq_lookup(p, ip);
+}
+
+static inline bool arch_rseq_needs_notify_resume(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT
+	/*
+	 * Under CONFIG_PREEMPT it's possible for regs to be incoherent in the
+	 * case that we took an interrupt during syscall entry.  Avoid this by
+	 * always deferring to our notify-resume handler.
+	 */
+	return true;
+#else
+	return arch_rseq_in_crit_section(p, task_pt_regs(p));
+#endif
+}
+
+void arch_rseq_handle_notify_resume(struct pt_regs *regs);
+void arch_rseq_check_critical_section(struct task_struct *p,
+				      struct pt_regs *regs);
+
+#else /* !CONFIG_RESTARTABLE_SEQUENCES */
+
+static inline void arch_rseq_handle_notify_resume(struct pt_regs *regs) {}
+static inline void arch_rseq_check_critical_section(struct task_struct *p,
+						    struct pt_regs *regs) {}
+
+#endif
+
+#endif /* _ASM_X86_RESTARTABLE_SEQUENCES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ff..ee98fb6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -110,6 +110,8 @@ obj-$(CONFIG_EFI)			+= sysfb_efi.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 
+obj-$(CONFIG_RESTARTABLE_SEQUENCES)	+= restartable_sequences.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 558f50e..934aeaf 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1439,6 +1439,8 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 	struct siginfo info;
 
 	fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
-	/* Send us the fake SIGTRAP */
-	force_sig_info(SIGTRAP, &info, tsk);
+	/* Don't single step in to a restartable sequence */
+	if (!rseq_lookup(tsk, (unsigned long)regs->ip))
+		/* Send us the fake SIGTRAP */
+		force_sig_info(SIGTRAP, &info, tsk);
 }
diff --git a/arch/x86/kernel/restartable_sequences.c b/arch/x86/kernel/restartable_sequences.c
new file mode 100644
index 0000000..330568a
--- /dev/null
+++ b/arch/x86/kernel/restartable_sequences.c
@@ -0,0 +1,47 @@
+/*
+ * Restartable Sequences: x86 ABI.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015, Google, Inc.,
+ * Paul Turner <pjt@google.com> and Andrew Hunter <ahh@google.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <asm/restartable_sequences.h>
+#include <asm/restartable_sequences.h>
+
+void arch_rseq_check_critical_section(struct task_struct *p,
+				      struct pt_regs *regs)
+{
+	unsigned long ip = arch_rseq_in_crit_section(p, regs);
+
+	if (!ip)
+		return;
+
+	/* RSEQ only applies to user-mode execution */
+	BUG_ON(!user_mode(regs));
+
+	regs->ip = ip;
+}
+
+void arch_rseq_handle_notify_resume(struct pt_regs *regs)
+{
+	struct restartable_sequence_state *rseq_state = &current->rseq_state;
+
+	/* If this update fails our user-state is incoherent. */
+	if (put_user(task_cpu(current), rseq_state->cpu_pointer))
+		force_sig(SIGSEGV, current);
+
+	arch_rseq_check_critical_section(current, regs);
+}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index da52e6b..1516e5d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -30,6 +30,7 @@
 #include <asm/fpu/signal.h>
 #include <asm/vdso.h>
 #include <asm/mce.h>
+#include <asm/restartable_sequences.h>
 #include <asm/sighandling.h>
 #include <asm/vm86.h>
 
@@ -377,7 +378,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 		 */
 		put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
 	} put_user_catch(err);
-	
+
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				regs, set->sig[0]);
@@ -613,6 +614,15 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	sigset_t *set = sigmask_to_save();
 	compat_sigset_t *cset = (compat_sigset_t *) set;
 
+	/*
+	 * If we are executing in the critical section of a restartable
+	 * sequence we need to fix up the user's stack saved ip at this point
+	 * so that signal handler return does not allow us to jump back into
+	 * the block across a context switch boundary.
+	 */
+	if (rseq_active(current))
+		arch_rseq_check_critical_section(current, regs);
+
 	/* Set up the stack frame */
 	if (is_ia32_frame()) {
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
diff --git a/kernel/restartable_sequences.c b/kernel/restartable_sequences.c
index 72cfa9b..87e63e2 100644
--- a/kernel/restartable_sequences.c
+++ b/kernel/restartable_sequences.c
@@ -20,18 +20,23 @@
 
 #ifdef CONFIG_RESTARTABLE_SEQUENCES
 
+#include <asm/restartable_sequences.h>
 #include <linux/uaccess.h>
 #include <linux/preempt.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
 static void rseq_sched_in_nop(struct preempt_notifier *pn, int cpu) {}
-static void rseq_sched_out_nop(struct preempt_notifier *pn,
-			       struct task_struct *next) {}
+static void rseq_sched_out(struct preempt_notifier *pn,
+			   struct task_struct *next)
+{
+	if (arch_rseq_needs_notify_resume(current))
+		set_thread_flag(TIF_NOTIFY_RESUME);
+}
 
 static __read_mostly struct preempt_ops rseq_preempt_ops = {
 	.sched_in = rseq_sched_in_nop,
-	.sched_out = rseq_sched_out_nop,
+	.sched_out = rseq_sched_out,
 };
 
 unsigned long rseq_lookup(struct task_struct *p, unsigned long ip)
-- 
2.4.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH 3/3] restartable sequences: basic user-space self-tests
  2015-10-22 18:06 [RFC PATCH 0/3] restartable sequences benchmarks Dave Watson
       [not found] ` <cover.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
  2015-10-22 18:06 ` [RFC PATCH 2/3] restartable sequences: x86 ABI Dave Watson
@ 2015-10-22 18:07 ` Dave Watson
  2015-10-22 19:11 ` [RFC PATCH 0/3] restartable sequences benchmarks Andy Lutomirski
  3 siblings, 0 replies; 13+ messages in thread
From: Dave Watson @ 2015-10-22 18:07 UTC (permalink / raw)
  To: davejwatson, kernel-team, linux-kernel, linux-api, pjt,
	mathieu.desnoyers

    Implements basic tests of RSEQ functionality.

    "basic_percpu_ops_test" implements a few simple per-cpu operations and
    testing their correctness.
---
 tools/testing/selftests/rseq/Makefile              |  14 +
 .../testing/selftests/rseq/basic_percpu_ops_test.c | 331 +++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.c                |  48 +++
 tools/testing/selftests/rseq/rseq.h                |  17 ++
 4 files changed, 410 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/Makefile
 create mode 100644 tools/testing/selftests/rseq/basic_percpu_ops_test.c
 create mode 100644 tools/testing/selftests/rseq/rseq.c
 create mode 100644 tools/testing/selftests/rseq/rseq.h

diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 0000000..3a9cb5c
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,14 @@
+CFLAGS += -Wall
+LDFLAGS += -lpthread
+
+TESTS = basic_test basic_percpu_ops_test
+
+basic_percpu_ops_test: basic_percpu_ops_test.c
+
+
+all: $(TESTS)
+%: %.c
+	$(CC) $(CFLAGS) -o $@ $^ rseq.c $(LDFLAGS)
+
+clean:
+	$(RM) $(TESTS)
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 0000000..63a668d
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,331 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "rseq.h"
+
+#if defined(__x86_64__)
+
+#define barrier() {__asm__ __volatile__("" : : : "memory"); }
+
+struct rseq_section {
+	void *begin;
+	void *end;
+	void *restart;
+};
+
+extern struct rseq_section const __start___rseq_sections[]
+__attribute((weak));
+extern struct rseq_section const __stop___rseq_sections[]
+__attribute((weak));
+
+/* Implemented by percpu_ops.S */
+struct percpu_lock {
+	int word[CPU_SETSIZE][16];  /* cache aligned; lock-word is [cpu][0] */
+};
+
+/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
+int rseq_percpu_lock(struct percpu_lock *lock)
+{
+	int out = -1;
+
+	asm volatile (
+		"1:\n\t"
+		"movl %1, %0\n\t"
+		"leaq (,%0,8), %%r10\n\t"
+		"leaq (%2, %%r10, 8), %%r10\n\t"
+		"2:\n\t"
+		"cmpl $0, (%%r10)\n\t"
+		"jne 2b\n\t"
+		"movl $1, (%%r10)\n\t"
+		"3:\n\t"
+		".pushsection __rseq_sections, \"a\"\n\t"
+		".quad 1b, 3b, 1b\n\t"
+		".popsection\n\t"
+		: "+r" (out)
+		: "m" (__rseq_current_cpu), "r" ((unsigned long)lock)
+		: "memory", "r10");
+	return out;
+}
+
+/*
+ * cmpxchg [with an additional check value].
+ *
+ * Returns:
+ *  -1 if *p != old or cpu != current cpu [ || check_ptr != check_val, ]
+ * otherwise 0.
+ *
+ * Note: When specified, check_ptr is dereferenced iff *p == old
+ */
+int rseq_percpu_cmpxchg(int cpu, intptr_t *p, intptr_t old, intptr_t new)
+{
+	asm volatile goto (
+		"1:\n\t"
+		"cmpl %1, %0\n\t"
+		"jne %l[fail]\n\t"
+		"cmpq %2, %3\n\t"
+		"jne %l[fail]\n\t"
+		"movq %4, %3\n\t"
+		"2:\n\t"
+		".pushsection __rseq_sections, \"a\"\n\t"
+		".quad 1b, 2b, 1b\n\t"
+		".popsection\n\t"
+		:
+		: "r" (cpu), "m" (__rseq_current_cpu),
+		  "r" (old), "m" (*p), "r" (new)
+		: "memory"
+		: fail);
+	return 0;
+fail:
+	return -1;
+}
+int rseq_percpu_cmpxchgcheck(int cpu, intptr_t *p, intptr_t old, intptr_t new,
+			intptr_t *check_ptr, intptr_t check_val)
+{
+	asm volatile goto (
+		"1:\n\t"
+		"cmpl %1, %0\n\t"
+		"jne %l[fail]\n\t"
+		"cmpq %2, %3\n\t"
+		"jne %l[fail]\n\t"
+		"cmpq %5, %6\n\t"
+		"jne %l[fail]\n\t"
+		"movq %4, %3\n\t"
+		"2:\n\t"
+		".pushsection __rseq_sections, \"a\"\n\t"
+		".quad 1b, 2b, 1b\n\t"
+		".popsection\n\t"
+		:
+		: "r" (cpu), "m" (__rseq_current_cpu),
+		  "r" (old), "m" (*p), "r" (new),
+		  "r" (check_val), "m" (*check_ptr)
+		: "memory"
+		: fail);
+	return 0;
+fail:
+	return -1;
+}
+
+
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+	barrier();  /* need a release-store here, this suffices on x86. */
+	assert(lock->word[cpu][0] == 1);
+	lock->word[cpu][0] = 0;
+}
+
+void rseq_unknown_restart_addr(void *addr)
+{
+	fprintf(stderr, "rseq: unrecognized restart address %p\n", addr);
+	exit(1);
+}
+
+struct spinlock_test_data {
+	struct percpu_lock lock;
+	int counts[CPU_SETSIZE];
+	int reps;
+};
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+	struct spinlock_test_data *data = arg;
+	int i, cpu;
+
+	rseq_configure_cpu_pointer();
+	for (i = 0; i < data->reps; i++) {
+		cpu = rseq_percpu_lock(&data->lock);
+		data->counts[cpu]++;
+		rseq_percpu_unlock(&data->lock, cpu);
+	}
+
+	return 0;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu lock.
+ * Obviously real applications might prefer to simply use a per-cpu increment;
+ * however, this is reasonable for a test and the lock can be extended to
+ * synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+	int i, sum;
+	pthread_t test_threads[200];
+	struct spinlock_test_data data;
+
+	memset(&data, 0, sizeof(data));
+	data.reps = 5000;
+
+	for (i = 0; i < 200; i++)
+		pthread_create(&test_threads[i], NULL,
+			       test_percpu_spinlock_thread, &data);
+
+	for (i = 0; i < 200; i++)
+		pthread_join(test_threads[i], NULL);
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.counts[i];
+
+	assert(sum == data.reps * 200);
+}
+
+struct percpu_list_node {
+	intptr_t data;
+	struct percpu_list_node *next;
+};
+
+struct percpu_list {
+	struct percpu_list_node *heads[CPU_SETSIZE];
+};
+
+int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
+{
+	int cpu;
+
+	do {
+		cpu = rseq_current_cpu();
+		node->next = list->heads[cpu];
+	} while (0 != rseq_percpu_cmpxchg(
+			cpu,
+			(intptr_t *)&list->heads[cpu], (intptr_t)node->next,
+			(intptr_t)node));
+
+	return cpu;
+}
+
+struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
+{
+	int cpu;
+	struct percpu_list_node *head, *next;
+
+	do {
+		cpu = rseq_current_cpu();
+		head = list->heads[cpu];
+		/*
+		 * Unlike a traditional lock-less linked list; the availability
+		 * of a cmpxchg-check primitive allows us to implement pop
+		 * without concerns over ABA-type races.
+		 */
+		if (!head)
+			return 0;
+		next = head->next;
+	} while (0 != rseq_percpu_cmpxchgcheck(cpu,
+		(intptr_t *)&list->heads[cpu], (intptr_t)head, (intptr_t)next,
+		(intptr_t *)&head->next, (intptr_t)next));
+
+	return head;
+}
+
+
+void *test_percpu_list_thread(void *arg)
+{
+	int i;
+	struct percpu_list *list = (struct percpu_list *)arg;
+
+	rseq_configure_cpu_pointer();
+	for (i = 0; i < 100000; i++) {
+		struct percpu_list_node *node = percpu_list_pop(list);
+
+		sched_yield();  /* encourage shuffling */
+		if (node)
+			percpu_list_push(list, node);
+	}
+
+	return 0;
+}
+
+/*
+ * Implements a per-cpu linked list then shuffles it via popping and pushing
+ * from many threads.
+ */
+void test_percpu_list(void)
+{
+	int i, j;
+	long sum = 0, expected_sum = 0;
+	struct percpu_list list;
+	pthread_t test_threads[200];
+	cpu_set_t allowed_cpus;
+
+	memset(&list, 0, sizeof(list));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		for (j = 1; j <= 100; j++) {
+			struct percpu_list_node *node;
+
+			expected_sum += j;
+
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			node->next = list.heads[i];
+			list.heads[i] = node;
+		}
+	}
+
+	for (i = 0; i < 200; i++)
+		assert(pthread_create(&test_threads[i], NULL,
+			       test_percpu_list_thread, &list) == 0);
+
+	for (i = 0; i < 200; i++)
+		pthread_join(test_threads[i], NULL);
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		cpu_set_t pin_mask;
+		struct percpu_list_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		CPU_ZERO(&pin_mask);
+		CPU_SET(i, &pin_mask);
+		sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
+
+		while ((node = percpu_list_pop(&list))) {
+			sum += node->data;
+			free(node);
+		}
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external actor
+	 * is interfering with our allowed affinity while this test is
+	 * running).
+	 */
+	assert(sum == expected_sum);
+}
+
+int main(int argc, char **argv)
+{
+	const struct rseq_section *iter;
+
+	for (iter = __start___rseq_sections;
+	     iter < __stop___rseq_sections;
+	     iter++) {
+		rseq_configure_region(iter->begin, iter->end, iter->restart);
+		printf("Installing region %p, %p\n", iter->begin, iter->end);
+	}
+	rseq_configure_cpu_pointer();
+
+	test_percpu_spinlock();
+	test_percpu_list();
+
+	return 0;
+}
+
+#else
+int main(int argc, char **argv)
+{
+	fprintf(stderr, "architecture not supported\n");
+	return 0;
+}
+#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 0000000..4dc5059
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,48 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "rseq.h"
+
+__thread volatile const int __rseq_current_cpu = -1;
+
+#define __NR_rseq	323
+#define SYS_RSEQ_SET_CRITICAL		0
+#define SYS_RSEQ_SET_CPU_POINTER	1
+
+int sys_rseq(int op, int flags, void *val1, void *val2, void *val3)
+{
+	return syscall(__NR_rseq, op, flags,
+		(intptr_t)val1, (intptr_t)val2, (intptr_t)val3);
+}
+
+static void sys_rseq_checked(int op, int flags,
+			void *val1, void *val2, void *val3)
+{
+	int rc = sys_rseq(op, flags, val1, val2, val3);
+
+	if (rc) {
+		fprintf(stderr, "sys_rseq(%d, %d, %p, %p, %p) failed(%d): %s\n",
+			op, flags, val1, val2, val3, errno, strerror(errno));
+		exit(1);
+	}
+}
+
+void rseq_configure_region(void *rseq_text_start, void *rseq_text_end,
+			void *rseq_text_restart)
+{
+	sys_rseq_checked(SYS_RSEQ_SET_CRITICAL, 0,
+			rseq_text_start, rseq_text_end, rseq_text_restart);
+}
+
+void rseq_configure_cpu_pointer(void)
+{
+	sys_rseq_checked(SYS_RSEQ_SET_CPU_POINTER, 0,
+			(void *)&__rseq_current_cpu, 0, 0);
+	assert(rseq_current_cpu() != -1); /* always updated prior to return. */
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 0000000..e12db18
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,17 @@
+#ifndef RSEQ_TEST_H
+#define RSEQ_TEST_H
+
+int sys_rseq(int op, int flags, void *val1, void *val2, void *val3);
+/* RSEQ provided thread-local current_cpu */
+
+void rseq_configure_cpu_pointer(void);
+
+void rseq_configure_region(void *rseq_text_start, void *rseq_text_end,
+	void *rseq_text_restart);
+
+extern __thread volatile const int __rseq_current_cpu;
+static inline int rseq_current_cpu(void) { return __rseq_current_cpu; }
+
+void run_tests(void);
+
+#endif
-- 
2.4.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 1/3] restartable sequences: user-space per-cpu critical sections
       [not found]     ` <581c0b6403285219724961a3d250c6d95dfacea7.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
@ 2015-10-22 18:53       ` kbuild test robot
  2015-10-22 19:35       ` kbuild test robot
  1 sibling, 0 replies; 13+ messages in thread
From: kbuild test robot @ 2015-10-22 18:53 UTC (permalink / raw)
  Cc: kbuild-all-JC7UmRfGjtg, davejwatson-b10kYP2dOMg,
	kernel-team-b10kYP2dOMg, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, pjt-hpIqsD4AKlfQT0dZR+AlfA,
	mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w

Hi Dave,

[auto build test WARNING on v4.3-rc6 -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Dave-Watson/restartable-sequences-benchmarks/20151023-020939
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
   include/linux/radix-tree.h:212:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
--
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
--
   mm/page_alloc.c:6010:1: sparse: directive in argument list
   mm/page_alloc.c:6012:1: sparse: directive in argument list
   mm/page_alloc.c:6019:1: sparse: directive in argument list
   mm/page_alloc.c:6021:1: sparse: directive in argument list
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
--
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
   include/linux/radix-tree.h:212:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:196:16: sparse: incompatible types in comparison expression (different address spaces)
--
   mm/debug.c:176:1: sparse: directive in argument list
   mm/debug.c:178:1: sparse: directive in argument list
   mm/debug.c:187:1: sparse: directive in argument list
   mm/debug.c:189:1: sparse: directive in argument list
   mm/debug.c:190:1: sparse: directive in argument list
   mm/debug.c:192:1: sparse: directive in argument list
   mm/debug.c:194:1: sparse: directive in argument list
   mm/debug.c:196:1: sparse: directive in argument list
   mm/debug.c:197:1: sparse: directive in argument list
   mm/debug.c:199:1: sparse: directive in argument list
   mm/debug.c:200:1: sparse: directive in argument list
   mm/debug.c:202:1: sparse: directive in argument list
   mm/debug.c:206:1: sparse: directive in argument list
   mm/debug.c:208:1: sparse: directive in argument list
   mm/debug.c:221:1: sparse: directive in argument list
   mm/debug.c:223:1: sparse: directive in argument list
   mm/debug.c:224:1: sparse: directive in argument list
   mm/debug.c:226:1: sparse: directive in argument list
   mm/debug.c:228:1: sparse: directive in argument list
   mm/debug.c:230:1: sparse: directive in argument list
   mm/debug.c:231:1: sparse: directive in argument list
   mm/debug.c:233:1: sparse: directive in argument list
   mm/debug.c:234:1: sparse: directive in argument list
   mm/debug.c:236:1: sparse: directive in argument list
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
--
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
   include/linux/radix-tree.h:212:16: sparse: incompatible types in comparison expression (different address spaces)
   include/linux/radix-tree.h:212:16: sparse: incompatible types in comparison expression (different address spaces)
--
   include/linux/sched.h:1189:34: sparse: Expected ) in function declarator
   include/linux/sched.h:1189:34: sparse: got (
>> builtin:0:0: sparse: expected ; at end of declaration
   include/linux/sched.h:1190:1: sparse: Expected ; at the end of type declaration
   include/linux/sched.h:1190:1: sparse: got }
   mm/memcontrol.c:3289:21: sparse: incompatible types in comparison expression (different address spaces)
   mm/memcontrol.c:3291:21: sparse: incompatible types in comparison expression (different address spaces)
   mm/memcontrol.c:4861:23: sparse: incompatible types in comparison expression (different address spaces)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 0/3] restartable sequences benchmarks
  2015-10-22 18:06 [RFC PATCH 0/3] restartable sequences benchmarks Dave Watson
                   ` (2 preceding siblings ...)
  2015-10-22 18:07 ` [RFC PATCH 3/3] restartable sequences: basic user-space self-tests Dave Watson
@ 2015-10-22 19:11 ` Andy Lutomirski
       [not found]   ` <CALCETrWXR0BMeEygiLoT5gbyYpGxren_Fch=S4RSryfqwdxH8g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  3 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-10-22 19:11 UTC (permalink / raw)
  To: Dave Watson
  Cc: kernel-team, linux-kernel@vger.kernel.org, Linux API, Paul Turner,
	Mathieu Desnoyers

On Thu, Oct 22, 2015 at 11:06 AM, Dave Watson <davejwatson@fb.com> wrote:
> We've been testing out restartable sequences + malloc changes for use
> at Facebook.  Below are some test results, as well as some possible
> changes based on Paul Turner's original patches

Thanks!  I'll stare at this some time between now and Kernel Summit.

>
> https://lkml.org/lkml/2015/6/24/665
>
> I ran one service with several permutations of various mallocs.  The
> service is CPU-bound, and hits the allocator quite hard.  Requests/s
> are held constant at the source, so we use cpu idle time and latency
> as an indicator of service quality. These are average numbers over
> several hours.  Machines were dual E5-2660, total 16 cores +
> hyperthreading.  This service has ~400 total threads, 70-90 of which
> are doing work at any particular time.
>
>                                    RSS CPUIDLE LATENCYMS
> jemalloc 4.0.0                     31G   33%     390
> jemalloc + this patch              25G   33%     390
> jemalloc + this patch using lsl    25G   30%     420
> jemalloc + PT's rseq patch         25G   32%     405
> glibc malloc 2.20                  27G   30%     420
> tcmalloc gperftools trunk (2.2)    21G   30%     480

Slightly confused.  This is showing a space efficiency improvement but
not a performance improvement?  Is the idea that percpu free lists are
more space efficient than per-thread free lists?

>
> jemalloc rseq patch used for testing:
> https://github.com/djwatson/jemalloc
>
> lsl test - using lsl segment limit to get cpu (i.e. inlined vdso
> getcpu on x86) instead of using the thread caching as in this patch.
> There has been some suggestions to add the thread-cached getcpu()
> feature separately.  It does seem to move the needle in a real service
> by about ~3% to have a thread-cached getcpu vs. not.  I don't think we
> can use restartable sequences in production without a faster getcpu.

If nothing else, I'd like to replace the thread-cached getcpu thing
with percpu gsbase, at least on x86.  That doesn't necessarily have to
be exclusive with restartable sequences.

>
> GS-segment / migration only tests
>
> There's been some interest in seeing if we can do this with only gs
> segment, here's some numbers for those.  This doesn't have to be gs,
> it could just be a migration signal sent to userspace as well, the
> same approaches would apply.
>
> GS patch: https://lkml.org/lkml/2014/9/13/59
>
>                                    RSS CPUIDLE LATENCYMS
> jemalloc 4.0.0                     31G   33%     390
> jemalloc + percpu locking          25G   25%     420
> jemalloc + preempt lock / signal   25G   32%     415

Neat!

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 1/3] restartable sequences: user-space per-cpu critical sections
       [not found]     ` <581c0b6403285219724961a3d250c6d95dfacea7.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
  2015-10-22 18:53       ` kbuild test robot
@ 2015-10-22 19:35       ` kbuild test robot
  1 sibling, 0 replies; 13+ messages in thread
From: kbuild test robot @ 2015-10-22 19:35 UTC (permalink / raw)
  Cc: kbuild-all-JC7UmRfGjtg, davejwatson-b10kYP2dOMg,
	kernel-team-b10kYP2dOMg, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, pjt-hpIqsD4AKlfQT0dZR+AlfA,
	mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w

[-- Attachment #1: Type: text/plain, Size: 1562 bytes --]

Hi Dave,

[auto build test ERROR on v4.3-rc6 -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Dave-Watson/restartable-sequences-benchmarks/20151023-020939
config: openrisc-allmodconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=openrisc 

All errors (new ones prefixed by >>):

   In file included from include/asm-generic/unistd.h:1:0,
                    from arch/openrisc/include/uapi/asm/unistd.h:26,
                    from arch/openrisc/kernel/sys_call_table.c:27:
>> include/uapi/asm-generic/unistd.h:717:1: error: 'sys_restartable_sequences' undeclared here (not in a function)

vim +/sys_restartable_sequences +717 include/uapi/asm-generic/unistd.h

   711	__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
   712	#define __NR_userfaultfd 282
   713	__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
   714	#define __NR_membarrier 283
   715	__SYSCALL(__NR_membarrier, sys_membarrier)
   716	#define __NR_restartable_sequences 284
 > 717	__SYSCALL(__NR_restartable_sequences, sys_restartable_sequences)
   718	
   719	#undef __NR_syscalls
   720	#define __NR_syscalls 285

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 35026 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH 0/3] restartable sequences benchmarks
       [not found]   ` <CALCETrWXR0BMeEygiLoT5gbyYpGxren_Fch=S4RSryfqwdxH8g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-10-22 22:10     ` Dave Watson
  0 siblings, 0 replies; 13+ messages in thread
From: Dave Watson @ 2015-10-22 22:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: kernel-team-b10kYP2dOMg,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Linux API,
	Paul Turner, Mathieu Desnoyers

On Thu, Oct 22, 2015 at 12:11:42PM -0700, Andy Lutomirski wrote:
> On Thu, Oct 22, 2015 at 11:06 AM, Dave Watson <davejwatson-b10kYP2dOMg@public.gmane.org> wrote:
> >
> >                                    RSS CPUIDLE LATENCYMS
> > jemalloc 4.0.0                     31G   33%     390
> > jemalloc + this patch              25G   33%     390
> > jemalloc + this patch using lsl    25G   30%     420
> > jemalloc + PT's rseq patch         25G   32%     405
> > glibc malloc 2.20                  27G   30%     420
> > tcmalloc gperftools trunk (2.2)    21G   30%     480
> 
> Slightly confused.  This is showing a space efficiency improvement but
> not a performance improvement?  Is the idea that percpu free lists are
> more space efficient than per-thread free lists?
> 
> --Andy

Correct - the service was already tuned such that most requests hit
the (very large) thread free lists to avoided taking expensive locks
talking to the central arena.  There were more threads than cpus, so
the memory win is just needing fewer free lists. 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-10-22 22:10 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-22 18:06 [RFC PATCH 0/3] restartable sequences benchmarks Dave Watson
     [not found] ` <cover.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
2015-10-22 18:06   ` [RFC PATCH 1/3] restartable sequences: user-space per-cpu critical sections Dave Watson
     [not found]     ` <581c0b6403285219724961a3d250c6d95dfacea7.1445464158.git.davejwatson-b10kYP2dOMg@public.gmane.org>
2015-10-22 18:53       ` kbuild test robot
2015-10-22 19:35       ` kbuild test robot
2015-10-22 18:06 ` [RFC PATCH 2/3] restartable sequences: x86 ABI Dave Watson
2015-10-22 18:07 ` [RFC PATCH 3/3] restartable sequences: basic user-space self-tests Dave Watson
2015-10-22 19:11 ` [RFC PATCH 0/3] restartable sequences benchmarks Andy Lutomirski
     [not found]   ` <CALCETrWXR0BMeEygiLoT5gbyYpGxren_Fch=S4RSryfqwdxH8g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-10-22 22:10     ` Dave Watson
  -- strict thread matches above, loose matches on Subject: below --
2015-06-24 22:26 [RFC PATCH 0/3] restartable sequences: fast user-space percpu critical sections Paul Turner
     [not found] ` <20150624222609.6116.86035.stgit-tdHu5vqousHHt/MElyovVYaSKrA+ACpX0E9HWUfgJXw@public.gmane.org>
2015-06-24 22:26   ` [RFC PATCH 2/3] restartable sequences: x86 ABI Paul Turner
     [not found]     ` <20150624222609.6116.30992.stgit-tdHu5vqousHHt/MElyovVYaSKrA+ACpX0E9HWUfgJXw@public.gmane.org>
2015-06-26 18:09       ` Mathieu Desnoyers
     [not found]         ` <1050218158.4054.1435342186284.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-06-26 19:04           ` Mathieu Desnoyers
2015-06-26 19:31           ` Andy Lutomirski
     [not found]             ` <CALCETrWKzP8UPH2OEmwbC4egcAa6NA+VkQD6OuA-LhFv-Aqg6Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-06-27  1:33               ` Paul Turner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).