* [PATCH -tip v3] x86/percpu: Introduce const-qualified const_pcpu_hot
@ 2023-10-23 14:30 Uros Bizjak
2023-10-25 12:57 ` kernel test robot
0 siblings, 1 reply; 2+ messages in thread
From: Uros Bizjak @ 2023-10-23 14:30 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Andy Lutomirski, Brian Gerst, Denys Vlasenko,
Ingo Molnar, H . Peter Anvin, Linus Torvalds, Peter Zijlstra,
Thomas Gleixner, Josh Poimboeuf, Nadav Amit
Some variables in pcpu_hot, currently current_task and top_of_stack
are actually per-thread variables implemented as per-cpu variables
and thus stable for the duration of the respective task. There is
already an attempt to eliminate redundant reads from these variables
using this_cpu_read_stable() asm macro, which hides the dependency
on the read memory address. However, the compiler has limited ability
to eliminate asm common subexpressions, so this approach results in a
limited success.
The solution is to allow more aggressive elimination by aliasing
pcpu_hot into a const-qualified const_pcpu_hot, and to read stable
per-cpu variables from this constant copy.
The current per-cpu infrastructure does not support reads from
const-qualified variables. However, when the compiler supports segment
qualifiers, it is possible to declare the const-aliased variable in
the relevant named address space. The compiler considers access to the
variable, declared in this way, as a read from a constant location,
and will optimize reads from the variable accordingly.
By implementing constant-qualified const_pcpu_hot, the compiler can
eliminate redundant reads from the constant variables, reducing the
number of loads from current_task from 3766 to 3217 on a test build,
a 14.6% reduction.
The reduction of loads translates to the following code savings:
text data bss dec hex filename
25477353 4389456 808452 30675261 1d4113d vmlinux-old.o
25476074 4389440 808452 30673966 1d40c2e vmlinux-new.o
representing a code size reduction of 1279 bytes.
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Co-developed-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Export const_pcpu_hot symbol.
---
v3: Rename const_pcpu_hot to const__pcpu_hot.
Return const__pcpu_hot.top_of_stack from current_top_of_stack().
---
arch/x86/include/asm/current.h | 7 +++++++
arch/x86/include/asm/percpu.h | 6 +++---
arch/x86/include/asm/processor.h | 3 +++
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/vmlinux.lds.S | 1 +
include/linux/compiler.h | 2 +-
6 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index a1168e7b69e5..6d8384f6d62d 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -36,8 +36,15 @@ static_assert(sizeof(struct pcpu_hot) == 64);
DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
+/* const-qualified alias to pcpu_hot, aliased by linker. */
+DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
+ const__pcpu_hot);
+
static __always_inline struct task_struct *get_current(void)
{
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return const__pcpu_hot.current_task;
+
return this_cpu_read_stable(pcpu_hot.current_task);
}
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index bbcc1ca737f0..630bb912a46b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -413,9 +413,9 @@ do { \
* accessed while this_cpu_read_stable() allows the value to be cached.
* this_cpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
- * get_current() and get_thread_info() both of which are actually
- * per-thread variables implemented as per-cpu variables and thus
- * stable for the duration of the respective task.
+ * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
+ * actually per-thread variables implemented as per-cpu variables and
+ * thus stable for the duration of the respective task.
*/
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ae81a7191c1c..dd10f35f2af6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -533,6 +533,9 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return const__pcpu_hot.top_of_stack;
+
return this_cpu_read_stable(pcpu_hot.top_of_stack);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b14fc8c1c953..d24e92ad3283 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2049,6 +2049,7 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
.top_of_stack = TOP_OF_INIT_STACK,
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+EXPORT_PER_CPU_SYMBOL(const__pcpu_hot);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 54a5596adaa6..4afbadbf0ab4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ ENTRY(phys_startup_64)
#endif
jiffies = jiffies_64;
+const__pcpu_hot = pcpu_hot;
#if defined(CONFIG_X86_64)
/*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d7779a18b24f..bf9815eaf4aa 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -212,7 +212,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define ___ADDRESSABLE(sym, __attrs) \
static void * __used __attrs \
- __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
+ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))
--
2.41.0
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH -tip v3] x86/percpu: Introduce const-qualified const_pcpu_hot
2023-10-23 14:30 [PATCH -tip v3] x86/percpu: Introduce const-qualified const_pcpu_hot Uros Bizjak
@ 2023-10-25 12:57 ` kernel test robot
0 siblings, 0 replies; 2+ messages in thread
From: kernel test robot @ 2023-10-25 12:57 UTC (permalink / raw)
To: Uros Bizjak; +Cc: oe-kbuild-all
Hi Uros,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/x86/core]
[also build test ERROR on dennis-percpu/for-next tip/auto-latest linus/master v6.6-rc7]
[cannot apply to tip/master next-20231025]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Uros-Bizjak/x86-percpu-Introduce-const-qualified-const_pcpu_hot/20231023-223358
base: tip/x86/core
patch link: https://lore.kernel.org/r/20231023143100.4014-1-ubizjak%40gmail.com
patch subject: [PATCH -tip v3] x86/percpu: Introduce const-qualified const_pcpu_hot
config: x86_64-buildonly-randconfig-001-20231025 (https://download.01.org/0day-ci/archive/20231025/202310252024.tnKhQHDe-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231025/202310252024.tnKhQHDe-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310252024.tnKhQHDe-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from include/asm-generic/percpu.h:7,
from arch/x86/include/asm/percpu.h:410,
from arch/x86/include/asm/preempt.h:6,
from include/linux/preempt.h:79,
from include/linux/spinlock.h:56,
from include/linux/swait.h:7,
from include/linux/completion.h:12,
from include/linux/crypto.h:15,
from arch/x86/kernel/asm-offsets.c:9:
>> arch/x86/include/asm/current.h:40:47: error: expected ')' before '__percpu_seg_override'
40 | DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
| ^~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:101:45: note: in definition of macro 'DECLARE_PER_CPU_SECTION'
101 | extern __PCPU_ATTRS(sec) __typeof__(type) name
| ^~~~
arch/x86/include/asm/current.h:40:1: note: in expansion of macro 'DECLARE_PER_CPU_ALIGNED'
40 | DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
| ^~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:101:44: note: to match this '('
101 | extern __PCPU_ATTRS(sec) __typeof__(type) name
| ^
include/linux/percpu-defs.h:147:9: note: in expansion of macro 'DECLARE_PER_CPU_SECTION'
147 | DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
| ^~~~~~~~~~~~~~~~~~~~~~~
arch/x86/include/asm/current.h:40:1: note: in expansion of macro 'DECLARE_PER_CPU_ALIGNED'
40 | DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
| ^~~~~~~~~~~~~~~~~~~~~~~
make[3]: *** [scripts/Makefile.build:116: arch/x86/kernel/asm-offsets.s] Error 1 shuffle=3533711212
make[3]: Target 'prepare' not remade because of errors.
make[2]: *** [Makefile:1202: prepare0] Error 2 shuffle=3533711212
make[2]: Target 'prepare' not remade because of errors.
make[1]: *** [Makefile:234: __sub-make] Error 2 shuffle=3533711212
make[1]: Target 'prepare' not remade because of errors.
make: *** [Makefile:234: __sub-make] Error 2 shuffle=3533711212
make: Target 'prepare' not remade because of errors.
vim +40 arch/x86/include/asm/current.h
38
39 /* const-qualified alias to pcpu_hot, aliased by linker. */
> 40 DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
41 const__pcpu_hot);
42
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-10-25 12:58 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-23 14:30 [PATCH -tip v3] x86/percpu: Introduce const-qualified const_pcpu_hot Uros Bizjak
2023-10-25 12:57 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.