* PATCH [0/2] percpu: Local cpu pointer optimizations @ 2010-09-07 11:41 Brian Gerst 2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst 2010-09-07 11:41 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst 0 siblings, 2 replies; 4+ messages in thread From: Brian Gerst @ 2010-09-07 11:41 UTC (permalink / raw) To: tj; +Cc: x86, linux-kernel This patch set allows arches to provide an optimized way of shifting a per-cpu pointer for the current cpu. On an x86-32 defconfig build, these patches save 1304 bytes of text. text data bss dec hex filename 7298335 717480 1374104 9389919 8f475f vmlinux.orig 7297283 717480 1374104 9388867 8f4343 vmlinux.1 7297031 717480 1374104 9388615 8f4247 vmlinux.2 [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr [PATCH 2/2] percpu: Optimize __get_cpu_var() arch/x86/include/asm/percpu.h | 11 +++++++++++ include/asm-generic/percpu.h | 14 +++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) v2: Added comments ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr 2010-09-07 11:41 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst @ 2010-09-07 11:41 ` Brian Gerst 2010-09-09 16:19 ` [PATCH UPDATED " Tejun Heo 2010-09-07 11:41 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst 1 sibling, 1 reply; 4+ messages in thread From: Brian Gerst @ 2010-09-07 11:41 UTC (permalink / raw) To: tj; +Cc: x86, linux-kernel Allow arches to implement __this_cpu_ptr, and provide an x86 version. Before: movq $foo, %rax movq %gs:this_cpu_off, %rdx addq %rdx, %rax After: movq $foo, %rax addq %gs:this_cpu_off, %rax The benefit is doing it in one less instruction and not clobbering a temporary register. Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/percpu.h | 11 +++++++++++ include/asm-generic/percpu.h | 9 +++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index cd28f9a..845343e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -47,6 +47,17 @@ #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) + +/* shift a per-cpu pointer to the current cpu's version */ +#define __this_cpu_ptr(ptr) \ +({ \ + typeof(ptr) __ptr = (ptr); \ + __verify_pcpu_ptr(ptr); \ + asm volatile("add " __percpu_arg(1) ", %0" \ + : "+r" (__ptr) \ + : "m" (this_cpu_off)); \ + __ptr; \ +}) #else #define __percpu_arg(x) "%P" #x #endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5043a9..5820fcb 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) - +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); -- 1.7.2.2 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH UPDATED 1/2] x86, percpu: Optimize this_cpu_ptr 2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst @ 2010-09-09 16:19 ` Tejun Heo 0 siblings, 0 replies; 4+ messages in thread From: Tejun Heo @ 2010-09-09 16:19 UTC (permalink / raw) To: Brian Gerst; +Cc: x86, linux-kernel Allow arches to implement __this_cpu_ptr, and provide an x86 version. Before: movq $foo, %rax movq %gs:this_cpu_off, %rdx addq %rdx, %rax After: movq $foo, %rax addq %gs:this_cpu_off, %rax The benefit is doing it in one less instruction and not clobbering a temporary register. tj: beefed up the comment a bit and renamed in-macro temp variable to match neighboring macros. Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: Tejun Heo <tj@kernel.org> --- Applied both patches to the following branch. I updated the first patch slightly. git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git for-next Thank you. arch/x86/include/asm/percpu.h | 14 ++++++++++++++ include/asm-generic/percpu.h | 9 +++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) Index: percpu/arch/x86/include/asm/percpu.h =================================================================== --- percpu.orig/arch/x86/include/asm/percpu.h +++ percpu/arch/x86/include/asm/percpu.h @@ -47,6 +47,20 @@ #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) + +/* + * Compared to the generic __my_cpu_offset version, the following + * saves one instruction and avoids clobbering a temp register. + */ +#define __this_cpu_ptr(ptr) \ +({ \ + typeof(ptr) tcp_ptr__ = (ptr); \ + __verify_pcpu_ptr(ptr); \ + asm volatile("add " __percpu_arg(1) ", %0" \ + : "+r" (tcp_ptr__) \ + : "m" (this_cpu_off)); \ + tcp_ptr__; \ +}) #else #define __percpu_arg(x) "%P" #x #endif Index: percpu/include/asm-generic/percpu.h =================================================================== --- percpu.orig/include/asm-generic/percpu.h +++ percpu/include/asm-generic/percpu.h @@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) - +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 2/2] percpu: Optimize __get_cpu_var() 2010-09-07 11:41 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst 2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst @ 2010-09-07 11:41 ` Brian Gerst 1 sibling, 0 replies; 4+ messages in thread From: Brian Gerst @ 2010-09-07 11:41 UTC (permalink / raw) To: tj; +Cc: x86, linux-kernel Redefine __get_cpu_var() using this_cpu_ptr() which can be arch-optimized. Signed-off-by: Brian Gerst <brgerst@gmail.com> --- include/asm-generic/percpu.h | 7 +++---- 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 5820fcb..c6e2c2d 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) #endif +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -- 1.7.2.2 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-09-09 16:19 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-09-07 11:41 PATCH [0/2] percpu: Local cpu pointer optimizations Brian Gerst 2010-09-07 11:41 ` [PATCH 1/2] x86, percpu: Optimize this_cpu_ptr Brian Gerst 2010-09-09 16:19 ` [PATCH UPDATED " Tejun Heo 2010-09-07 11:41 ` [PATCH 2/2] percpu: Optimize __get_cpu_var() Brian Gerst
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox