All of lore.kernel.org
 help / color / mirror / Atom feed
From: clameter@sgi.com
From: Christoph Lameter <clameter@sgi.com>
To: ak@suse.de
Cc: akpm@linux-foundation.org
Cc: travis@sgi.com
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: linux-kernel@vger.kernel.org
Subject: [rfc 37/45] x86_64: Support for fast per cpu operations
Date: Mon, 19 Nov 2007 17:12:09 -0800	[thread overview]
Message-ID: <20071120011340.569486037@sgi.com> (raw)
In-Reply-To: 20071120011132.143632442@sgi.com

[-- Attachment #1: cpu_ops_x86 --]
[-- Type: text/plain, Size: 7261 bytes --]

Support fast cpu ops in x86_64 by providing a series of functions that
generate the proper instructions. Define CONFIG_FAST_CPU_OPS so that core code
can exploit the availability of fast per cpu operations.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 arch/x86/Kconfig            |    4 
 include/asm-x86/percpu_64.h |  262 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 266 insertions(+)

Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig	2007-11-19 16:16:03.458140098 -0800
+++ linux-2.6/arch/x86/Kconfig	2007-11-19 16:17:17.473389874 -0800
@@ -137,6 +137,10 @@ config GENERIC_PENDING_IRQ
 	depends on GENERIC_HARDIRQS && SMP
 	default y
 
+config FAST_CPU_OPS
+	bool
+	default y
+
 config X86_SMP
 	bool
 	depends on X86_32 && SMP && !X86_VOYAGER
Index: linux-2.6/include/asm-x86/percpu_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/percpu_64.h	2007-11-19 16:17:16.953139798 -0800
+++ linux-2.6/include/asm-x86/percpu_64.h	2007-11-19 16:17:17.473389874 -0800
@@ -71,4 +71,266 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
 
 
+#define __xp(x) ((volatile unsigned long *)(x))
+
+static inline unsigned long __cpu_read_gs(volatile void *ptr, int size)
+{
+	unsigned long result;
+	switch (size) {
+	case 1:
+		__asm__ ("mov %%gs:%1, %b0"
+				     : "=r"(result)
+				     : "m"(*__xp(ptr)));
+		return result;
+	case 2:
+		__asm__ ("movw %%gs:%1, %w0"
+				     : "=r"(result)
+				     : "m"(*__xp(ptr)));
+		return result;
+	case 4:
+		__asm__ ("movl %%gs:%1, %k0"
+				     : "=r"(result)
+				     : "m"(*__xp(ptr)));
+		return result;
+	case 8:
+		__asm__ ("movq %%gs:%1, %0"
+				     : "=r"(result)
+				     : "m"(*__xp(ptr)));
+		return result;
+	}
+	BUG();
+}
+
+#define cpu_read_gs(obj)\
+	((__typeof__(obj))__cpu_read_gs(&(obj), sizeof(obj)))
+
+static inline void __cpu_write_gs(volatile void *ptr,
+				unsigned long data, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("mov %b0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("movw %w0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("movl %k0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("movq %0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_write_gs(obj, value)\
+	__cpu_write_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_add_gs(volatile void *ptr,
+				long data, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("add %b0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("addw %w0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("addl %k0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("addq %0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_add_gs(obj, value)\
+	__cpu_add_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_sub_gs(volatile void *ptr,
+				long data, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("sub %b0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("subw %w0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("subl %k0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("subq %0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_sub_gs(obj, value)\
+	__cpu_sub_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_xchg_gs(volatile void *ptr,
+				long data, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("xchg %b0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("xchgw %w0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("xchgl %k0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("xchgq %0, %%gs:%1"
+				: : "ri"(data), "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_xchg_gs(obj, value)\
+	__cpu_xchg_gs(&(obj), (unsigned long)value, sizeof(obj))
+
+static inline void __cpu_inc_gs(volatile void *ptr, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("incb %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("incw %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("incl %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("incq %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_inc_gs(obj)\
+	__cpu_inc_gs(&(obj), sizeof(obj))
+
+static inline void __cpu_dec_gs(volatile void *ptr, int size)
+{
+	switch (size) {
+	case 1:
+		__asm__ ("decb %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 2:
+		__asm__ ("decw %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 4:
+		__asm__ ("decl %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	case 8:
+		__asm__ ("decq %%gs:%0"
+				: : "m"(*__xp(ptr)));
+		return;
+	}
+	BUG();
+}
+
+#define cpu_dec_gs(obj)\
+	__cpu_dec_gs(&(obj), sizeof(obj))
+
+static inline unsigned long __cmpxchg_local_gs(volatile void *ptr,
+			unsigned long old, unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ ("cmpxchgb %b1, %%gs:%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xp(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ ("cmpxchgw %w1, %%gs:%2"
+				     : "=a"(prev)
+				     : "r"(new), "m"(*__xp(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ ("cmpxchgl %k1, %%gs:%2"
+				     : "=a"(prev)
+				     : "r"(new), "m"(*__xp(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 8:
+		__asm__ ("cmpxchgq %1, %%gs:%2"
+				     : "=a"(prev)
+				     : "r"(new), "m"(*__xp(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
+#define cmpxchg_local_gs(obj, o, n)\
+	((__typeof__(obj))__cmpxchg_local_gs(&(obj),(unsigned long)(o),\
+					(unsigned long)(n),sizeof(obj)))
+
+#define CPU_READ(obj)		cpu_read_gs(obj)
+#define CPU_WRITE(obj,val)	cpu_write_gs(obj, val)
+#define CPU_ADD(obj,val)	cpu_add_gs(obj, val)
+#define CPU_SUB(obj,val)	cpu_sub_gs(obj, val)
+#define CPU_INC(obj)		cpu_inc_gs(obj)
+#define CPU_DEC(obj)		cpu_dec_gs(obj)
+
+#define CPU_XCHG(obj,val)	cpu_xchg_gs(obj, val)
+#define CPU_CMPXCHG(obj, old, new) cmpxchg_local_gs(obj, old, new)
+
+/*
+ * All cpu operations are interrupt safe and do not need to disable
+ * preempt. So the other variants all reduce to the same instruction.
+ */
+#define _CPU_READ CPU_READ
+#define _CPU_WRITE CPU_WRITE
+#define _CPU_ADD CPU_ADD
+#define _CPU_SUB CPU_SUB
+#define _CPU_INC CPU_INC
+#define _CPU_DEC CPU_DEC
+#define _CPU_XCHG CPU_XCHG
+#define _CPU_CMPXCHG CPU_CMPXCHG
+
+#define __CPU_READ CPU_READ
+#define __CPU_WRITE CPU_WRITE
+#define __CPU_ADD CPU_ADD
+#define __CPU_SUB CPU_SUB
+#define __CPU_INC CPU_INC
+#define __CPU_DEC CPU_DEC
+#define __CPU_XCHG CPU_XCHG
+#define __CPU_CMPXCHG CPU_CMPXCHG
+
 #endif /* _ASM_X8664_PERCPU_H_ */

-- 

  parent reply	other threads:[~2007-11-20  1:25 UTC|newest]

Thread overview: 120+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-20  1:11 [rfc 00/45] [RFC] CPU ops and a rework of per cpu data handling on x86_64 clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 01/45] ACPI: Avoid references to impossible processors clameter, Christoph Lameter
2007-11-20 12:47   ` Mathieu Desnoyers
2007-11-20 20:16     ` Christoph Lameter
2007-11-20 15:29   ` Andi Kleen
2007-11-20 20:18     ` Christoph Lameter
2007-11-20  1:11 ` [rfc 02/45] cpu alloc: Simple version of the allocator (static allocations) clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 03/45] Generic CPU operations: Core piece clameter, Christoph Lameter
2007-11-20  3:17   ` Mathieu Desnoyers
2007-11-20  3:30     ` Christoph Lameter
2007-11-20  4:07       ` Mathieu Desnoyers
2007-11-20 20:36         ` Christoph Lameter
2007-11-20  1:11 ` [rfc 04/45] cpu alloc: Use in SLUB clameter, Christoph Lameter
2007-11-20 12:42   ` Mathieu Desnoyers
2007-11-20 20:44     ` Christoph Lameter
2007-11-20 21:23       ` Mathieu Desnoyers
2007-11-20 21:36         ` Christoph Lameter
2007-11-20 21:43           ` Mathieu Desnoyers
2007-11-20  1:11 ` [rfc 05/45] cpu alloc: Remove SLUB fields clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 06/45] cpu alloc: page allocator conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 07/45] cpu_alloc: Implement dynamically extendable cpu areas clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 08/45] cpu alloc: x86 support clameter, Christoph Lameter
2007-11-20  1:35   ` H. Peter Anvin
2007-11-20  2:02     ` Christoph Lameter
2007-11-20  2:18       ` H. Peter Anvin
2007-11-20  3:37       ` Nick Piggin
2007-11-20  3:59       ` Nick Piggin
2007-11-20 12:05         ` Andi Kleen
2007-11-20  3:16   ` Andi Kleen
2007-11-20  3:50     ` Christoph Lameter
2007-11-20 12:01       ` Andi Kleen
2007-11-20 20:35         ` Christoph Lameter
2007-11-20 20:59           ` Andi Kleen
2007-11-20 21:33             ` Christoph Lameter
2007-11-21  0:10               ` Christoph Lameter
2007-11-21  1:16                 ` Christoph Lameter
2007-11-21  1:36                   ` Andi Kleen
2007-11-21  2:08                     ` Christoph Lameter
2007-11-21 13:08                       ` Andi Kleen
2007-11-21 19:01                         ` Christoph Lameter
2007-11-20 20:43         ` H. Peter Anvin
2007-11-20 20:51           ` Andi Kleen
2007-11-20 20:58             ` Christoph Lameter
2007-11-20 21:06               ` H. Peter Anvin
2007-11-20 21:34                 ` Christoph Lameter
2007-11-20 21:01             ` H. Peter Anvin
2007-11-27  4:12         ` John Richard Moser
2007-11-20  1:11 ` [rfc 09/45] cpu alloc: IA64 support clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 10/45] cpu_alloc: Sparc64 support clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 11/45] cpu alloc: percpu_counter conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 12/45] cpu alloc: crash_notes conversion clameter, Christoph Lameter
2007-11-20 13:03   ` Mathieu Desnoyers
2007-11-20 20:50     ` Christoph Lameter
2007-11-20  1:11 ` [rfc 13/45] cpu alloc: workqueue conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 14/45] cpu alloc: ACPI cstate handling conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 15/45] cpu alloc: genhd statistics conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 16/45] cpu alloc: blktrace conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 17/45] cpu alloc: SRCU clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 18/45] cpu alloc: XFS counters clameter, Christoph Lameter
2007-11-20  8:12   ` Christoph Hellwig
2007-11-20 20:38     ` Christoph Lameter
2007-11-21  4:47       ` David Chinner
2007-11-21  4:50         ` Christoph Lameter
2007-11-20  1:11 ` [rfc 19/45] cpu alloc: NFS statistics clameter, Christoph Lameter
2007-11-20 13:02   ` Mathieu Desnoyers
2007-11-20 20:49     ` Christoph Lameter
2007-11-20 20:56       ` Trond Myklebust
2007-11-20 21:28         ` Mathieu Desnoyers
2007-11-20 21:48           ` Trond Myklebust
2007-11-20 21:50             ` Mathieu Desnoyers
2007-11-20 22:46               ` Trond Myklebust
2007-11-21  0:53                 ` Mathieu Desnoyers
2007-11-20 21:26       ` Mathieu Desnoyers
2007-11-20  1:11 ` [rfc 20/45] cpu alloc: neigbour statistics clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 21/45] cpu alloc: tcp statistics clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 22/45] cpu alloc: convert scatches clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 23/45] cpu alloc: dmaengine conversion clameter, Christoph Lameter
2007-11-20 12:50   ` Mathieu Desnoyers
2007-11-20 20:46     ` Christoph Lameter
2007-11-20  1:11 ` [rfc 24/45] cpu alloc: convert loopback statistics clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 25/45] cpu alloc: veth conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 26/45] cpu alloc: Chelsio statistics conversion clameter, Christoph Lameter
2007-11-20  1:11 ` [rfc 27/45] cpu alloc: convert mib handling to cpu alloc clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 28/45] cpu_alloc: convert network sockets clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 29/45] cpu alloc: Use for infiniband clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 30/45] cpu alloc: Use in the crypto subsystem clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 31/45] cpu alloc: Remove the allocpercpu functionality clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 32/45] Module handling: Use CPU_xx ops to dynamically allocate counters clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 33/45] x86_64: Use CPU ops for nmi alert counter clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 34/45] x86_64: Fold percpu area into the cpu area clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 35/45] X86_64: Declare pda as per cpu data thereby moving it " clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 36/45] X86_64: Place pda first in " clameter, Christoph Lameter
2007-11-20  1:12 ` clameter, Christoph Lameter [this message]
2007-11-20  2:00   ` [rfc 37/45] x86_64: Support for fast per cpu operations H. Peter Anvin
2007-11-20  2:03     ` Christoph Lameter
2007-11-20  2:15       ` H. Peter Anvin
2007-11-20  2:17     ` David Miller
2007-11-20  2:19       ` H. Peter Anvin
2007-11-20  3:23         ` Andi Kleen
2007-11-20  2:45     ` Paul Mackerras
2007-11-20  1:12 ` [rfc 38/45] x86_64: Remove obsolete per_cpu offset calculations clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 39/45] x86_64: Remove the data_offset field from the pda clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 40/45] x86_64: Provide per_cpu_var definition clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 41/45] VM statistics: Use CPU ops clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 43/45] x86_64: Add a CPU_OR to support or_pda() clameter, Christoph Lameter
2007-11-20  1:12 ` [rfc 44/45] Remove local_t support clameter, Christoph Lameter
2007-11-20 12:59   ` Mathieu Desnoyers
2007-11-20 20:48     ` Christoph Lameter
2007-11-20  1:12 ` [rfc 45/45] Modules: Hack to handle symbols that have a zero value clameter, Christoph Lameter
2007-11-20  2:20   ` Mathieu Desnoyers
2007-11-20  2:49     ` Christoph Lameter
2007-11-20  3:29       ` Mathieu Desnoyers
2007-11-20  1:18 ` [rfc 00/45] [RFC] CPU ops and a rework of per cpu data handling on x86_64 Christoph Lameter
2007-11-20  1:51 ` David Miller
2007-11-20  1:59   ` Christoph Lameter
2007-11-20  2:10     ` David Miller
2007-11-20  2:12       ` Christoph Lameter
2007-11-20  3:25   ` Andi Kleen
2007-11-20  3:33     ` Christoph Lameter
2007-11-20  4:04     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071120011340.569486037@sgi.com \
    --to=clameter@sgi.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.