linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] getcpu_cache system call: caching current CPU number (x86)
@ 2015-07-12 18:06 Mathieu Desnoyers
       [not found] ` <1436724386-30909-1-git-send-email-mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Mathieu Desnoyers @ 2015-07-12 18:06 UTC (permalink / raw)
  To: Paul Turner
  Cc: Mathieu Desnoyers, Andrew Hunter, Peter Zijlstra, Ingo Molnar,
	Ben Maurer, Steven Rostedt, Paul E. McKenney, Josh Triplett,
	Lai Jiangshan, Linus Torvalds, Andrew Morton,
	linux-api-u79uwXL29TY76Z2rM5mHXA

Expose a new system call allowing threads to register a userspace memory
area where to store the current CPU number. Scheduler migration sets the
TIF_NOTIFY_RESUME flag on the current thread. Upon return to user-space,
a notify-resume handler updates the current CPU value within that
user-space memory area.

This getcpu cache is an alternative to the sched_getcpu() vdso which has
a few benefits:
- It is faster to do a memory read that to call a vDSO,
- This cache value can be read from within an inline assembly, which
  makes it a useful building block for restartable sequences.

This approach is inspired by Paul Turner and Andrew Hunter's work
on percpu atomics, which lets the kernel handle restart of critical
sections:
Ref.:
* https://lkml.org/lkml/2015/6/24/665
* https://lwn.net/Articles/650333/
* http://www.linuxplumbersconf.org/2013/ocw/system/presentations/1695/original/LPC%20-%20PerCpu%20Atomics.pdf

Benchmarking sched_getcpu() vs tls cache approach. Getting the
current CPU number:

- With Linux vdso:            12.7 ns
- With TLS-cached cpu number:  0.3 ns

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
CC: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
CC: Andrew Hunter <ahh-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
CC: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
CC: Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
CC: Ben Maurer <bmaurer-b10kYP2dOMg@public.gmane.org>
CC: Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>
CC: "Paul E. McKenney" <paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
CC: Josh Triplett <josh-iaAMLnmF4UmaiuxdJuQwMA@public.gmane.org>
CC: Lai Jiangshan <laijs-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
CC: Linus Torvalds <torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
CC: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
CC: linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
---
 arch/x86/kernel/signal.c          |  2 ++
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 fs/exec.c                         |  1 +
 include/linux/sched.h             | 27 +++++++++++++++
 include/uapi/asm-generic/unistd.h |  4 ++-
 init/Kconfig                      |  9 +++++
 kernel/Makefile                   |  1 +
 kernel/fork.c                     |  2 ++
 kernel/getcpu-cache.c             | 70 +++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c               |  3 ++
 kernel/sched/sched.h              |  2 ++
 kernel/sys_ni.c                   |  3 ++
 12 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 kernel/getcpu-cache.c

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e504246..157cec0 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -750,6 +750,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (getcpu_cache_active(current))
+			getcpu_cache_handle_notify_resume(current);
 	}
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fb..cfcf8e7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -329,6 +329,7 @@
 320	common	kexec_file_load		sys_kexec_file_load
 321	common	bpf			sys_bpf
 322	64	execveat		stub_execveat
+323	common	getcpu_cache		sys_getcpu_cache
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/exec.c b/fs/exec.c
index c7f9b73..20ef2e6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1555,6 +1555,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
+	getcpu_cache_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current);
 	free_bprm(bprm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a419b65..0654cc2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1710,6 +1710,9 @@ struct task_struct {
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 	unsigned long	task_state_change;
 #endif
+#ifdef CONFIG_GETCPU_CACHE
+	int32_t __user *getcpu_cache;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -3090,4 +3093,28 @@ static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+#ifdef CONFIG_GETCPU_CACHE
+void getcpu_cache_fork(struct task_struct *t);
+void getcpu_cache_execve(struct task_struct *t);
+void getcpu_cache_handle_notify_resume(struct task_struct *t);
+static inline bool getcpu_cache_active(struct task_struct *t)
+{
+	return t->getcpu_cache;
+}
+#else
+static inline void getcpu_cache_fork(struct task_struct *t)
+{
+}
+static inline void getcpu_cache_execve(struct task_struct *t)
+{
+}
+static inline void getcpu_cache_handle_notify_resume(struct task_struct *t)
+{
+}
+static inline bool getcpu_cache_active(struct task_struct *t)
+{
+	return false;
+}
+#endif
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index e016bd9..f82b70d 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_getcpu_cache 282
+__SYSCALL(__NR_getcpu_cache, sys_getcpu_cache)
 
 #undef __NR_syscalls
-#define __NR_syscalls 282
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/init/Kconfig b/init/Kconfig
index f5dbc6d..fac919b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1559,6 +1559,15 @@ config PCI_QUIRKS
 	  bugs/quirks. Disable this only if your target machine is
 	  unaffected by PCI quirks.
 
+config GETCPU_CACHE
+	bool "Enable getcpu_cache() system call" if EXPERT
+	default y
+	help
+	  Enable the getcpu_cache() system call which provides a
+	  user-space cache for the current CPU number value.
+
+	  If unsure, say Y.
+
 config EMBEDDED
 	bool "Embedded system"
 	option allnoconfig_y
diff --git a/kernel/Makefile b/kernel/Makefile
index 1408b33..3350ba1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_GETCPU_CACHE) += getcpu-cache.o
 
 $(obj)/configs.o: $(obj)/config_data.h
 
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..334e62d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1549,6 +1549,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	cgroup_post_fork(p);
 	if (clone_flags & CLONE_THREAD)
 		threadgroup_change_end(current);
+	if (!(clone_flags & CLONE_THREAD))
+		getcpu_cache_fork(p);
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
diff --git a/kernel/getcpu-cache.c b/kernel/getcpu-cache.c
new file mode 100644
index 0000000..b4e5c77
--- /dev/null
+++ b/kernel/getcpu-cache.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Mathieu Desnoyers <mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
+ *
+ * getcpu_cache system call
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+
+/*
+ * This resume handler should always be executed between a migration
+ * triggered by preemption and return to user-space.
+ */
+void getcpu_cache_handle_notify_resume(struct task_struct *t)
+{
+	int32_t __user *gcp = t->getcpu_cache;
+
+	if (gcp == NULL)
+		return;
+	if (unlikely(t->flags & PF_EXITING))
+		return;
+	/*
+	 * access_ok() of gcp_user has already been checked by
+	 * sys_getcpu_cache().
+	 */
+	if (__put_user(raw_smp_processor_id(), gcp))
+		force_sig(SIGSEGV, current);
+}
+
+/*
+ * If parent process has a getcpu_cache, the child inherits. Only
+ * applies when forking a process, not a thread.
+ */
+void getcpu_cache_fork(struct task_struct *t)
+{
+	t->getcpu_cache = current->getcpu_cache;
+}
+
+void getcpu_cache_execve(struct task_struct *t)
+{
+	t->getcpu_cache = NULL;
+}
+
+/*
+ * sys_getcpu_cache - setup getcpu cache for caller thread
+ */
+SYSCALL_DEFINE2(getcpu_cache, int32_t __user *, gcp, int, flags)
+{
+	if (flags)
+		return -EINVAL;
+	if (gcp != NULL && !access_ok(VERIFY_WRITE, gcp, sizeof(int32_t)))
+		return -EFAULT;
+	current->getcpu_cache = gcp;
+	/* Will update *gcp on resume */
+	if (gcp)
+		set_thread_flag(TIF_NOTIFY_RESUME);
+	return 0;
+}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62671f5..a9009d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1823,6 +1823,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	p->numa_group = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_GETCPU_CACHE
+	p->getcpu_cache = NULL;
+#endif
 }
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dc0f435..bf3e346 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -921,6 +921,8 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
 	set_task_rq(p, cpu);
 #ifdef CONFIG_SMP
+	if (getcpu_cache_active(p))
+		set_tsk_thread_flag(p, TIF_NOTIFY_RESUME);
 	/*
 	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
 	 * successfuly executed on another CPU. We must ensure that updates of
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0a..3691dc8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -229,3 +229,6 @@ cond_syscall(sys_bpf);
 
 /* execveat */
 cond_syscall(sys_execveat);
+
+/* current CPU number cache */
+cond_syscall(sys_getcpu_cache);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2015-07-22  7:53 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-12 18:06 [RFC PATCH] getcpu_cache system call: caching current CPU number (x86) Mathieu Desnoyers
     [not found] ` <1436724386-30909-1-git-send-email-mathieu.desnoyers-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-12 18:47   ` Josh Triplett
2015-07-13  3:40     ` Andy Lutomirski
2015-07-13 15:09     ` Mathieu Desnoyers
2015-07-13  3:38   ` Andy Lutomirski
     [not found]     ` <CALCETrV1suAbvMgD1jOEFyn3JcDE_hhi6X7+sGs9e3Oqw_6jUw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-13 15:27       ` Mathieu Desnoyers
     [not found]         ` <1050138282.1065.1436801252018.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-13 15:30           ` Andrew Hunter
     [not found]             ` <CADroS=7MnUULrjDeQtmscxjkpjCtti9V-HfFXU0sjKhi6PsaAg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-13 16:07               ` Mathieu Desnoyers
2015-07-13 18:36           ` Andy Lutomirski
2015-07-13 11:17   ` Ben Maurer
     [not found]     ` <5CDDBDF2D36D9F43B9F5E99003F6A0D48D5F39C6-f8hGUhss0nh9TZdEUguypQ2O0Ztt9esIQQ4Iyu8u01E@public.gmane.org>
2015-07-13 17:36       ` Mathieu Desnoyers
2015-07-14  9:34         ` Ben Maurer
     [not found]           ` <5CDDBDF2D36D9F43B9F5E99003F6A0D48D5F5DA0-f8hGUhss0nh9TZdEUguypQ2O0Ztt9esIQQ4Iyu8u01E@public.gmane.org>
2015-07-16 18:08             ` Mathieu Desnoyers
     [not found]               ` <549319255.383.1437070088597.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-16 19:27                 ` Andy Lutomirski
2015-07-17 10:21                   ` Ondřej Bílka
2015-07-17 15:53                     ` Andy Lutomirski
     [not found]                   ` <CALCETrWEKE=mow3vVh7C4r8CuGy_d5VOEz7KkpijuR5cpBfFtg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-17 18:48                     ` Linus Torvalds
     [not found]                       ` <CA+55aFz-VBnEKh0SPKgu8xV5=Zb+=6odybVUDoOYOknshbcFJA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-17 18:55                         ` Andy Lutomirski
     [not found]                           ` <CALCETrVNcLpZVATHOs-gZR9AMUSW_ScvXW_0oY=OnFHXXHLdaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-17 19:11                             ` Linus Torvalds
2015-07-17 23:28                       ` Ondřej Bílka
2015-07-17 23:33                         ` Andy Lutomirski
2015-07-18 10:35                           ` Ondřej Bílka
     [not found]                           ` <CALCETrVY=kjeA_4pazy3BL+ekfcV6WHKw8e3z-LBxx_uP1bw2Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-20  8:35                             ` Florian Weimer
     [not found]                               ` <55ACB2DC.5010503-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-07-20 15:31                                 ` Andy Lutomirski
     [not found]                                   ` <CALCETrV9Vp5UUOb3e_R5tphyE-urBgTwQR2pFWUOOFnHqWXHKQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-20 15:32                                     ` Florian Weimer
     [not found]                                       ` <55AD14A4.6030101-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-07-20 17:41                                         ` Andy Lutomirski
     [not found]                                           ` <CALCETrUx6wFxmz+9TyW5bNgaMN0q180G8y9YOyq_D41sdhFaRQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-20 20:07                                             ` josh-iaAMLnmF4UmaiuxdJuQwMA
2015-07-21  7:55                                               ` Florian Weimer
     [not found]                                           ` <CA+55aFzMJkzydXb7uVv1iSUnp=539d43ghQaonGdzMoF7QLZBA@mail.gmail.com>
     [not found]                                             ` <CA+55aFzMJkzydXb7uVv1iSUnp=539d43ghQaonGdzMoF7QLZBA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-20 21:09                                               ` Andy Lutomirski
2015-07-20 22:39                                                 ` Linus Torvalds
     [not found]                                                   ` <CA+55aFwLZLeeN7UN82dyt=emQcNBc8qZPJAw5iqtAbBwFA7FPQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-21  0:25                                                     ` Mathieu Desnoyers
     [not found]                                                       ` <2010227315.699.1437438300542.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-21  7:30                                                         ` Ondřej Bílka
2015-07-21 12:58                                                           ` Mathieu Desnoyers
     [not found]                                                             ` <894137397.137.1437483493715.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-21 15:16                                                               ` Ondřej Bílka
2015-07-21 17:45                                                                 ` Mathieu Desnoyers
     [not found]                                                                   ` <1350114812.1035.1437500726799.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-21 18:00                                                                     ` Ondřej Bílka
2015-07-21 18:18                                                                       ` Mathieu Desnoyers
     [not found]                                                                         ` <2028561497.1088.1437502683664.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-22  7:53                                                                           ` Ondřej Bílka
2015-07-21  8:01                                                         ` Florian Weimer
2015-07-20 13:18                             ` Konstantin Khlebnikov
2015-07-18  7:34                         ` Rich Felker
     [not found]                           ` <20150718073433.GH1173-C3MtFaGISjmo6RMmaWD+6Sb1p8zYI1N1@public.gmane.org>
2015-07-18 10:51                             ` Ondřej Bílka
2015-07-18  9:47               ` Florian Weimer
     [not found]         ` <587954201.31.1436808992876.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-17 10:58           ` Ondřej Bílka
2015-07-17 16:03             ` Mathieu Desnoyers
     [not found]               ` <626545401.1010.1437149010438.JavaMail.zimbra-vg+e7yoeK/dWk0Htik3J/w@public.gmane.org>
2015-07-17 22:43                 ` Ondřej Bílka
2015-07-18  2:43                   ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).