From: clameter@sgi.com
From: Christoph Lameter <clameter@sgi.com>
To: ak@suse.de
Cc: akpm@linux-foundation.org
Cc: travis@sgi.com
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: linux-kernel@vger.kernel.org
Subject: [rfc 37/45] x86_64: Support for fast per cpu operations
Date: Mon, 19 Nov 2007 17:12:09 -0800 [thread overview]
Message-ID: <20071120011340.569486037@sgi.com> (raw)
In-Reply-To: 20071120011132.143632442@sgi.com
[-- Attachment #1: cpu_ops_x86 --]
[-- Type: text/plain, Size: 7261 bytes --]
Support fast cpu ops in x86_64 by providing a series of functions that
generate the proper instructions. Define CONFIG_FAST_CPU_OPS so that core code
can exploit the availability of fast per cpu operations.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
arch/x86/Kconfig | 4
include/asm-x86/percpu_64.h | 262 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 266 insertions(+)
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig 2007-11-19 16:16:03.458140098 -0800
+++ linux-2.6/arch/x86/Kconfig 2007-11-19 16:17:17.473389874 -0800
@@ -137,6 +137,10 @@ config GENERIC_PENDING_IRQ
depends on GENERIC_HARDIRQS && SMP
default y
+config FAST_CPU_OPS
+ bool
+ default y
+
config X86_SMP
bool
depends on X86_32 && SMP && !X86_VOYAGER
Index: linux-2.6/include/asm-x86/percpu_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/percpu_64.h 2007-11-19 16:17:16.953139798 -0800
+++ linux-2.6/include/asm-x86/percpu_64.h 2007-11-19 16:17:17.473389874 -0800
@@ -71,4 +71,266 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+#define __xp(x) ((volatile unsigned long *)(x))
+
+static inline unsigned long __cpu_read_gs(volatile void *ptr, int size)
+{
+ unsigned long result;
+ switch (size) {
+ case 1:
+ __asm__ ("mov %%gs:%1, %b0"
+ : "=r"(result)
+ : "m"(*__xp(ptr)));
+ return result;
+ case 2:
+ __asm__ ("movw %%gs:%1, %w0"
+ : "=r"(result)
+ : "m"(*__xp(ptr)));
+ return result;
+ case 4:
+ __asm__ ("movl %%gs:%1, %k0"
+ : "=r"(result)
+ : "m"(*__xp(ptr)));
+ return result;
+ case 8:
+ __asm__ ("movq %%gs:%1, %0"
+ : "=r"(result)
+ : "m"(*__xp(ptr)));
+ return result;
+ }
+ BUG();
+}
+
+#define cpu_read_gs(obj)\
+ ((__typeof__(obj))__cpu_read_gs(&(obj), sizeof(obj)))
+
+static inline void __cpu_write_gs(volatile void *ptr,
+ unsigned long data, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("mov %b0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("movw %w0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("movl %k0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("movq %0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_write_gs(obj, value)\
+ __cpu_write_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_add_gs(volatile void *ptr,
+ long data, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("add %b0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("addw %w0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("addl %k0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("addq %0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_add_gs(obj, value)\
+ __cpu_add_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_sub_gs(volatile void *ptr,
+ long data, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("sub %b0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("subw %w0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("subl %k0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("subq %0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_sub_gs(obj, value)\
+ __cpu_sub_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_xchg_gs(volatile void *ptr,
+ long data, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("xchg %b0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("xchgw %w0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("xchgl %k0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("xchgq %0, %%gs:%1"
+ : : "ri"(data), "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_xchg_gs(obj, value)\
+ __cpu_xchg_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_inc_gs(volatile void *ptr, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("incb %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("incw %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("incl %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("incq %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_inc_gs(obj)\
+ __cpu_inc_gs(&(obj), sizeof(obj))
+
+static inline void __cpu_dec_gs(volatile void *ptr, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ ("decb %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 2:
+ __asm__ ("decw %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 4:
+ __asm__ ("decl %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ case 8:
+ __asm__ ("decq %%gs:%0"
+ : : "m"(*__xp(ptr)));
+ return;
+ }
+ BUG();
+}
+
+#define cpu_dec_gs(obj)\
+ __cpu_dec_gs(&(obj), sizeof(obj))
+
+static inline unsigned long __cmpxchg_local_gs(volatile void *ptr,
+ unsigned long old, unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ ("cmpxchgb %b1, %%gs:%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xp(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ ("cmpxchgw %w1, %%gs:%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xp(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ ("cmpxchgl %k1, %%gs:%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xp(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+ __asm__ ("cmpxchgq %1, %%gs:%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xp(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
+#define cmpxchg_local_gs(obj, o, n)\
+ ((__typeof__(obj))__cmpxchg_local_gs(&(obj),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(obj)))
+
+#define CPU_READ(obj) cpu_read_gs(obj)
+#define CPU_WRITE(obj,val) cpu_write_gs(obj, val)
+#define CPU_ADD(obj,val) cpu_add_gs(obj, val)
+#define CPU_SUB(obj,val) cpu_sub_gs(obj, val)
+#define CPU_INC(obj) cpu_inc_gs(obj)
+#define CPU_DEC(obj) cpu_dec_gs(obj)
+
+#define CPU_XCHG(obj,val) cpu_xchg_gs(obj, val)
+#define CPU_CMPXCHG(obj, old, new) cmpxchg_local_gs(obj, old, new)
+
+/*
+ * All cpu operations are interrupt safe and do not need to disable
+ * preempt. So the other variants all reduce to the same instruction.
+ */
+#define _CPU_READ CPU_READ
+#define _CPU_WRITE CPU_WRITE
+#define _CPU_ADD CPU_ADD
+#define _CPU_SUB CPU_SUB
+#define _CPU_INC CPU_INC
+#define _CPU_DEC CPU_DEC
+#define _CPU_XCHG CPU_XCHG
+#define _CPU_CMPXCHG CPU_CMPXCHG
+
+#define __CPU_READ CPU_READ
+#define __CPU_WRITE CPU_WRITE
+#define __CPU_ADD CPU_ADD
+#define __CPU_SUB CPU_SUB
+#define __CPU_INC CPU_INC
+#define __CPU_DEC CPU_DEC
+#define __CPU_XCHG CPU_XCHG
+#define __CPU_CMPXCHG CPU_CMPXCHG
+
#endif /* _ASM_X8664_PERCPU_H_ */
--
next prev parent reply other threads:[~2007-11-20 1:25 UTC|newest]
Thread overview: 120+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-11-20 1:11 [rfc 00/45] [RFC] CPU ops and a rework of per cpu data handling on x86_64 clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 01/45] ACPI: Avoid references to impossible processors clameter, Christoph Lameter
2007-11-20 12:47 ` Mathieu Desnoyers
2007-11-20 20:16 ` Christoph Lameter
2007-11-20 15:29 ` Andi Kleen
2007-11-20 20:18 ` Christoph Lameter
2007-11-20 1:11 ` [rfc 02/45] cpu alloc: Simple version of the allocator (static allocations) clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 03/45] Generic CPU operations: Core piece clameter, Christoph Lameter
2007-11-20 3:17 ` Mathieu Desnoyers
2007-11-20 3:30 ` Christoph Lameter
2007-11-20 4:07 ` Mathieu Desnoyers
2007-11-20 20:36 ` Christoph Lameter
2007-11-20 1:11 ` [rfc 04/45] cpu alloc: Use in SLUB clameter, Christoph Lameter
2007-11-20 12:42 ` Mathieu Desnoyers
2007-11-20 20:44 ` Christoph Lameter
2007-11-20 21:23 ` Mathieu Desnoyers
2007-11-20 21:36 ` Christoph Lameter
2007-11-20 21:43 ` Mathieu Desnoyers
2007-11-20 1:11 ` [rfc 05/45] cpu alloc: Remove SLUB fields clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 06/45] cpu alloc: page allocator conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 07/45] cpu_alloc: Implement dynamically extendable cpu areas clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 08/45] cpu alloc: x86 support clameter, Christoph Lameter
2007-11-20 1:35 ` H. Peter Anvin
2007-11-20 2:02 ` Christoph Lameter
2007-11-20 2:18 ` H. Peter Anvin
2007-11-20 3:37 ` Nick Piggin
2007-11-20 3:59 ` Nick Piggin
2007-11-20 12:05 ` Andi Kleen
2007-11-20 3:16 ` Andi Kleen
2007-11-20 3:50 ` Christoph Lameter
2007-11-20 12:01 ` Andi Kleen
2007-11-20 20:35 ` Christoph Lameter
2007-11-20 20:59 ` Andi Kleen
2007-11-20 21:33 ` Christoph Lameter
2007-11-21 0:10 ` Christoph Lameter
2007-11-21 1:16 ` Christoph Lameter
2007-11-21 1:36 ` Andi Kleen
2007-11-21 2:08 ` Christoph Lameter
2007-11-21 13:08 ` Andi Kleen
2007-11-21 19:01 ` Christoph Lameter
2007-11-20 20:43 ` H. Peter Anvin
2007-11-20 20:51 ` Andi Kleen
2007-11-20 20:58 ` Christoph Lameter
2007-11-20 21:06 ` H. Peter Anvin
2007-11-20 21:34 ` Christoph Lameter
2007-11-20 21:01 ` H. Peter Anvin
2007-11-27 4:12 ` John Richard Moser
2007-11-20 1:11 ` [rfc 09/45] cpu alloc: IA64 support clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 10/45] cpu_alloc: Sparc64 support clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 11/45] cpu alloc: percpu_counter conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 12/45] cpu alloc: crash_notes conversion clameter, Christoph Lameter
2007-11-20 13:03 ` Mathieu Desnoyers
2007-11-20 20:50 ` Christoph Lameter
2007-11-20 1:11 ` [rfc 13/45] cpu alloc: workqueue conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 14/45] cpu alloc: ACPI cstate handling conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 15/45] cpu alloc: genhd statistics conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 16/45] cpu alloc: blktrace conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 17/45] cpu alloc: SRCU clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 18/45] cpu alloc: XFS counters clameter, Christoph Lameter
2007-11-20 8:12 ` Christoph Hellwig
2007-11-20 20:38 ` Christoph Lameter
2007-11-21 4:47 ` David Chinner
2007-11-21 4:50 ` Christoph Lameter
2007-11-20 1:11 ` [rfc 19/45] cpu alloc: NFS statistics clameter, Christoph Lameter
2007-11-20 13:02 ` Mathieu Desnoyers
2007-11-20 20:49 ` Christoph Lameter
2007-11-20 20:56 ` Trond Myklebust
2007-11-20 21:28 ` Mathieu Desnoyers
2007-11-20 21:48 ` Trond Myklebust
2007-11-20 21:50 ` Mathieu Desnoyers
2007-11-20 22:46 ` Trond Myklebust
2007-11-21 0:53 ` Mathieu Desnoyers
2007-11-20 21:26 ` Mathieu Desnoyers
2007-11-20 1:11 ` [rfc 20/45] cpu alloc: neigbour statistics clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 21/45] cpu alloc: tcp statistics clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 22/45] cpu alloc: convert scatches clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 23/45] cpu alloc: dmaengine conversion clameter, Christoph Lameter
2007-11-20 12:50 ` Mathieu Desnoyers
2007-11-20 20:46 ` Christoph Lameter
2007-11-20 1:11 ` [rfc 24/45] cpu alloc: convert loopback statistics clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 25/45] cpu alloc: veth conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 26/45] cpu alloc: Chelsio statistics conversion clameter, Christoph Lameter
2007-11-20 1:11 ` [rfc 27/45] cpu alloc: convert mib handling to cpu alloc clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 28/45] cpu_alloc: convert network sockets clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 29/45] cpu alloc: Use for infiniband clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 30/45] cpu alloc: Use in the crypto subsystem clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 31/45] cpu alloc: Remove the allocpercpu functionality clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 32/45] Module handling: Use CPU_xx ops to dynamically allocate counters clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 33/45] x86_64: Use CPU ops for nmi alert counter clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 34/45] x86_64: Fold percpu area into the cpu area clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 35/45] X86_64: Declare pda as per cpu data thereby moving it " clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 36/45] X86_64: Place pda first in " clameter, Christoph Lameter
2007-11-20 1:12 ` clameter, Christoph Lameter [this message]
2007-11-20 2:00 ` [rfc 37/45] x86_64: Support for fast per cpu operations H. Peter Anvin
2007-11-20 2:03 ` Christoph Lameter
2007-11-20 2:15 ` H. Peter Anvin
2007-11-20 2:17 ` David Miller
2007-11-20 2:19 ` H. Peter Anvin
2007-11-20 3:23 ` Andi Kleen
2007-11-20 2:45 ` Paul Mackerras
2007-11-20 1:12 ` [rfc 38/45] x86_64: Remove obsolete per_cpu offset calculations clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 39/45] x86_64: Remove the data_offset field from the pda clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 40/45] x86_64: Provide per_cpu_var definition clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 41/45] VM statistics: Use CPU ops clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 43/45] x86_64: Add a CPU_OR to support or_pda() clameter, Christoph Lameter
2007-11-20 1:12 ` [rfc 44/45] Remove local_t support clameter, Christoph Lameter
2007-11-20 12:59 ` Mathieu Desnoyers
2007-11-20 20:48 ` Christoph Lameter
2007-11-20 1:12 ` [rfc 45/45] Modules: Hack to handle symbols that have a zero value clameter, Christoph Lameter
2007-11-20 2:20 ` Mathieu Desnoyers
2007-11-20 2:49 ` Christoph Lameter
2007-11-20 3:29 ` Mathieu Desnoyers
2007-11-20 1:18 ` [rfc 00/45] [RFC] CPU ops and a rework of per cpu data handling on x86_64 Christoph Lameter
2007-11-20 1:51 ` David Miller
2007-11-20 1:59 ` Christoph Lameter
2007-11-20 2:10 ` David Miller
2007-11-20 2:12 ` Christoph Lameter
2007-11-20 3:25 ` Andi Kleen
2007-11-20 3:33 ` Christoph Lameter
2007-11-20 4:04 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071120011340.569486037@sgi.com \
--to=clameter@sgi.com \
--cc=ak@suse.de \
--cc=akpm@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox