* PATCH [0/2] percpu: Local cpu pointer optimizations
@ 2010-09-04 17:21 Brian Gerst
2010-09-04 17:21 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
2010-09-04 17:21 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst
0 siblings, 2 replies; 5+ messages in thread
From: Brian Gerst @ 2010-09-04 17:21 UTC (permalink / raw)
To: tj; +Cc: x86, linux-kernel
This patch set allows arches to provide an optimized way of shifting
a per-cpu pointer for the current cpu.
On an x86-32 defconfig build, these patches save 1304 bytes of text.
text data bss dec hex filename
7298335 717480 1374104 9389919 8f475f vmlinux.orig
7297283 717480 1374104 9388867 8f4343 vmlinux.1
7297031 717480 1374104 9388615 8f4247 vmlinux.2
[PATCH 1/2] x86, percpu: Optimize this_cpu_ptr
[PATCH 2/2] percpu: Optimize __get_cpu_var()
arch/x86/include/asm/percpu.h | 9 +++++++++
include/asm-generic/percpu.h | 14 +++++++++-----
2 files changed, 18 insertions(+), 5 deletions(-)
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr
2010-09-04 17:21 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst
@ 2010-09-04 17:21 ` Brian Gerst
2010-09-05 9:20 ` Tejun Heo
2010-09-04 17:21 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst
1 sibling, 1 reply; 5+ messages in thread
From: Brian Gerst @ 2010-09-04 17:21 UTC (permalink / raw)
To: tj; +Cc: x86, linux-kernel
Allow arches to implement __this_cpu_ptr, and provide an x86 version.
Before:
movq $foo, %rax
movq %gs:this_cpu_off, %rdx
addq %rdx, %rax
After:
movq $foo, %rax
addq %gs:this_cpu_off, %rax
The benefit is doing it in one less instruction and not clobbering
a temporary register.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
arch/x86/include/asm/percpu.h | 9 +++++++++
include/asm-generic/percpu.h | 9 +++++++--
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cd28f9a..d854438 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,15 @@
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
+#define __this_cpu_ptr(ptr) \
+({ \
+ typeof(ptr) __ptr = (ptr); \
+ __verify_pcpu_ptr(ptr); \
+ asm volatile("add " __percpu_arg(1) ", %0" \
+ : "+r" (__ptr) \
+ : "m" (this_cpu_off)); \
+ __ptr; \
+})
#else
#define __percpu_arg(x) "%P" #x
#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b5043a9..5820fcb 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
#define __raw_get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#ifndef __this_cpu_ptr
#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
-
+#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#endif
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void setup_per_cpu_areas(void);
--
1.7.2.2
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr
2010-09-04 17:21 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
@ 2010-09-05 9:20 ` Tejun Heo
0 siblings, 0 replies; 5+ messages in thread
From: Tejun Heo @ 2010-09-05 9:20 UTC (permalink / raw)
To: Brian Gerst; +Cc: x86, linux-kernel
Hello,
On 09/04/2010 07:21 PM, Brian Gerst wrote:
> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> index cd28f9a..d854438 100644
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -47,6 +47,15 @@
> #ifdef CONFIG_SMP
> #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
> #define __my_cpu_offset percpu_read(this_cpu_off)
> +#define __this_cpu_ptr(ptr) \
> +({ \
> + typeof(ptr) __ptr = (ptr); \
> + __verify_pcpu_ptr(ptr); \
> + asm volatile("add " __percpu_arg(1) ", %0" \
> + : "+r" (__ptr) \
> + : "m" (this_cpu_off)); \
> + __ptr; \
> +})
Great, thanks for doing this, but can you please comment on what
__this_cpu_ptr() optimizes above? Something similar to the patch
description but shorter.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/2] percpu: Optimize __get_cpu_var()
2010-09-04 17:21 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst
2010-09-04 17:21 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
@ 2010-09-04 17:21 ` Brian Gerst
1 sibling, 0 replies; 5+ messages in thread
From: Brian Gerst @ 2010-09-04 17:21 UTC (permalink / raw)
To: tj; +Cc: x86, linux-kernel
Redefine __get_cpu_var() using this_cpu_ptr() which can be
arch-optimized.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
include/asm-generic/percpu.h | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 5820fcb..c6e2c2d 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
*/
#define per_cpu(var, cpu) \
(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-#define __get_cpu_var(var) \
- (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
-#define __raw_get_cpu_var(var) \
- (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
#ifndef __this_cpu_ptr
#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
@@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
#endif
+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
+
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void setup_per_cpu_areas(void);
#endif
--
1.7.2.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* PATCH [0/2] percpu: Local cpu pointer optimizations
@ 2010-09-07 11:41 Brian Gerst
2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
0 siblings, 1 reply; 5+ messages in thread
From: Brian Gerst @ 2010-09-07 11:41 UTC (permalink / raw)
To: tj; +Cc: x86, linux-kernel
This patch set allows arches to provide an optimized way of shifting
a per-cpu pointer for the current cpu.
On an x86-32 defconfig build, these patches save 1304 bytes of text.
text data bss dec hex filename
7298335 717480 1374104 9389919 8f475f vmlinux.orig
7297283 717480 1374104 9388867 8f4343 vmlinux.1
7297031 717480 1374104 9388615 8f4247 vmlinux.2
[PATCH 1/2] x86, percpu: Optimize this_cpu_ptr
[PATCH 2/2] percpu: Optimize __get_cpu_var()
arch/x86/include/asm/percpu.h | 11 +++++++++++
include/asm-generic/percpu.h | 14 +++++++++-----
2 files changed, 20 insertions(+), 5 deletions(-)
v2: Added comments
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr
2010-09-07 11:41 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst
@ 2010-09-07 11:41 ` Brian Gerst
0 siblings, 0 replies; 5+ messages in thread
From: Brian Gerst @ 2010-09-07 11:41 UTC (permalink / raw)
To: tj; +Cc: x86, linux-kernel
Allow arches to implement __this_cpu_ptr, and provide an x86 version.
Before:
movq $foo, %rax
movq %gs:this_cpu_off, %rdx
addq %rdx, %rax
After:
movq $foo, %rax
addq %gs:this_cpu_off, %rax
The benefit is doing it in one less instruction and not clobbering
a temporary register.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
arch/x86/include/asm/percpu.h | 11 +++++++++++
include/asm-generic/percpu.h | 9 +++++++--
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cd28f9a..845343e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,17 @@
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
+
+/* shift a per-cpu pointer to the current cpu's version */
+#define __this_cpu_ptr(ptr) \
+({ \
+ typeof(ptr) __ptr = (ptr); \
+ __verify_pcpu_ptr(ptr); \
+ asm volatile("add " __percpu_arg(1) ", %0" \
+ : "+r" (__ptr) \
+ : "m" (this_cpu_off)); \
+ __ptr; \
+})
#else
#define __percpu_arg(x) "%P" #x
#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b5043a9..5820fcb 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
#define __raw_get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#ifndef __this_cpu_ptr
#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
-
+#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#endif
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void setup_per_cpu_areas(void);
--
1.7.2.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-09-07 11:41 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-04 17:21 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst
2010-09-04 17:21 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
2010-09-05 9:20 ` Tejun Heo
2010-09-04 17:21 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst
-- strict thread matches above, loose matches on Subject: below --
2010-09-07 11:41 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst
2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox