* [PATCH v4 1/8] x86/runtime-const: Introduce runtime_const_mask_32()
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 2/8] arm64/runtime-const: Use aarch64_insn_patch_text_nosync() for patching K Prateek Nayak
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Borislav Petkov, Dave Hansen, x86
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, H. Peter Anvin, Thomas Huth, Sean Christopherson
From: Peter Zijlstra <peterz@infradead.org>
Futex hash computation requires a mask operation with read-only after
init data that will be converted to a runtime constant in the subsequent
commit.
Introduce runtime_const_mask_32 to further optimize the mask operation
in the futex hash computation hot path.
[ prateek: Broke off the x86 chunk, commit message. ]
Link: https://patch.msgid.link/20260227161841.GH606826@noisy.programming.kicks-ass.net
Not-yet-signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o No changes.
---
arch/x86/include/asm/runtime-const.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index 4cd94fdcb45e2..b13f7036c1c9b 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -41,6 +41,15 @@
:"+r" (__ret)); \
__ret; })
+#define runtime_const_mask_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("and $0x12345678, %k0\n1:\n" \
+ ".pushsection runtime_mask_" #sym ",\"a\"\n\t"\
+ ".long 1b - 4 - .\n" \
+ ".popsection" \
+ : "+r" (__ret)); \
+ __ret; })
+
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
@@ -65,6 +74,11 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
*(unsigned char *)where = val;
}
+static inline void __runtime_fixup_mask(void *where, unsigned long val)
+{
+ *(unsigned int *)where = val;
+}
+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 2/8] arm64/runtime-const: Use aarch64_insn_patch_text_nosync() for patching
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 1/8] x86/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 3/8] arm64/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Catalin Marinas, Will Deacon
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, Jisheng Zhang
The current scheme to directly patch the kernel text for runtime
constants runs into the following issue with futex adapted to using
runtime constants on arm64:
Unable to handle kernel write to read-only memory at virtual address ...
The pc points to the *p assignment in the following call chain:
futex_init()
runtime_const_init(shift, __futex_shift)
__runtime_fixup_shift()
*p = cpu_to_le32(insn);
which suggests that core_initcall() is too late to patch the kernel text
directly unlike the "d_hash_shift" which is initialized during
vfs_caches_init_early() before the protections are in place.
Use aarch64_insn_patch_text_nosync() to patch the runtime constants
instead of doing it directly to allow runtime_const_init() slightly
later into the boot.
Since aarch64_insn_patch_text_nosync() calls caches_clean_inval_pou()
internally, __runtime_fixup_caches() ends up being redundant.
runtime_const_init() are rare and the overheads of multiple calls to
caches_clean_inval_pou() instead of batching them together should be
negligible in practice.
The cpu_to_le32() conversion of instruction isn't necessary since it is
handled later in the aarch64_insn_patch_text_nosync() call-chain:
aarch64_insn_patch_text_nosync(addr, insn)
aarch64_insn_write(addr, insn)
__aarch64_insn_write(addr, cpu_to_le32(insn))
Sashiko noted that aarch64_insn_patch_text_nosync() does not expect a
lm_alias() address and Catalin suggested it is safe to drop the
lm_alias() for runtime patching since the kernel text is readable. The
address passed to fixup function is interpreted as a __le32 and
dereferenced as is to read the opcode at the patch site.
No functional changes are intended.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o Dropped the lm_alias() and use the patch location as is for
aarch64_insn_patch_text_nosync(). (Sashiko, Catalin)
---
arch/arm64/include/asm/runtime-const.h | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
index c3dbd3ae68f69..838145bc289d2 100644
--- a/arch/arm64/include/asm/runtime-const.h
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -7,6 +7,7 @@
#endif
#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
/* Sigh. You can still run arm64 in BE mode */
#include <asm/byteorder.h>
@@ -50,34 +51,26 @@ static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
u32 insn = le32_to_cpu(*p);
insn &= 0xffe0001f;
insn |= (val & 0xffff) << 5;
- *p = cpu_to_le32(insn);
-}
-
-static inline void __runtime_fixup_caches(void *where, unsigned int insns)
-{
- unsigned long va = (unsigned long)where;
- caches_clean_inval_pou(va, va + 4*insns);
+ aarch64_insn_patch_text_nosync(p, insn);
}
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
{
- __le32 *p = lm_alias(where);
+ __le32 *p = where;
__runtime_fixup_16(p, val);
__runtime_fixup_16(p+1, val >> 16);
__runtime_fixup_16(p+2, val >> 32);
__runtime_fixup_16(p+3, val >> 48);
- __runtime_fixup_caches(where, 4);
}
/* Immediate value is 6 bits starting at bit #16 */
static inline void __runtime_fixup_shift(void *where, unsigned long val)
{
- __le32 *p = lm_alias(where);
+ __le32 *p = where;
u32 insn = le32_to_cpu(*p);
insn &= 0xffc0ffff;
insn |= (val & 63) << 16;
- *p = cpu_to_le32(insn);
- __runtime_fixup_caches(where, 1);
+ aarch64_insn_patch_text_nosync(p, insn);
}
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 3/8] arm64/runtime-const: Introduce runtime_const_mask_32()
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 1/8] x86/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 2/8] arm64/runtime-const: Use aarch64_insn_patch_text_nosync() for patching K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 4/8] riscv/runtime-const: Replace open-coded placeholder with RUNTIME_MAGIC K Prateek Nayak
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Catalin Marinas, Will Deacon
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, Jisheng Zhang
Futex hash computation requires a mask operation with read-only after
init data that will be converted to a runtime constant in the subsequent
commit.
Introduce runtime_const_mask_32 to further optimize the mask operation
in the futex hash computation hot path. GCC generates a:
movz w1, #lo16, lsl #0 // w1 = bits [15:0]
movk w1, #hi16, lsl #16 // w1 = full 32-bit value
and w0, w0, w1 // w0 = w0 & w1
pattern to tackle arbitrary 32-bit masks and the same was also suggested
by Claude which is implemented here. The final (__ret & mask) operation
is intentiaonally placed outside of asm block to allow compilers to
further optimize it if possible.
__runtime_fixup_ptr() already patches a "movz, + movk lsl #16" sequence
which has been reused to patch the same sequence for
__runtime_fixup_mask().
Assisted-by: Claude:claude-sonnet-4-5
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o Reverted back to using __ret as the macro variable to prevent
collision with local varaibles at callsite. (Sashiko)
o Separated out the & operation to prevent any confusion with operator
precedence id "val" is an expression. (Sashiko)
---
arch/arm64/include/asm/runtime-const.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
index 838145bc289d2..1db4faac8c373 100644
--- a/arch/arm64/include/asm/runtime-const.h
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -36,6 +36,18 @@
:"r" (0u+(val))); \
__ret; })
+#define runtime_const_mask_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "movz %w0, #0xcdef\n\t" \
+ "movk %w0, #0x89ab, lsl #16\n\t" \
+ ".pushsection runtime_mask_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret &= val; /* Allow compiler to optimize & op. */ \
+ __ret; })
+
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
@@ -73,6 +85,13 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
aarch64_insn_patch_text_nosync(p, insn);
}
+static inline void __runtime_fixup_mask(void *where, unsigned long val)
+{
+ __le32 *p = where;
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+}
+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 4/8] riscv/runtime-const: Replace open-coded placeholder with RUNTIME_MAGIC
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
` (2 preceding siblings ...)
2026-04-30 9:47 ` [PATCH v4 3/8] arm64/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 5/8] riscv/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Guo Ren
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, Alexandre Ghiti, Charlie Jenkins, Jisheng Zhang,
Charles Mirabile
Define the placeholder used for lui + addi[w] patching sequence as
RUNTIME_MAGIC and use that instead of open coding the constants in the
inline assembly.
No functional changes intended.
Suggested-by: Guo Ren <guoren@kernel.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o New patch based on suggestions from Guo. (Thank you!)
---
arch/riscv/include/asm/runtime-const.h | 38 ++++++++++++++------------
1 file changed, 20 insertions(+), 18 deletions(-)
diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h
index 900db0a103d05..1ce02605d2e43 100644
--- a/arch/riscv/include/asm/runtime-const.h
+++ b/arch/riscv/include/asm/runtime-const.h
@@ -15,21 +15,23 @@
#include <linux/uaccess.h>
+#define RUNTIME_MAGIC __ASM_STR(0x89ABCDEF)
+
#ifdef CONFIG_32BIT
-#define runtime_const_ptr(sym) \
-({ \
- typeof(sym) __ret; \
- asm_inline(".option push\n\t" \
- ".option norvc\n\t" \
- "1:\t" \
- "lui %[__ret],0x89abd\n\t" \
- "addi %[__ret],%[__ret],-0x211\n\t" \
- ".option pop\n\t" \
- ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
- ".long 1b - .\n\t" \
- ".popsection" \
- : [__ret] "=r" (__ret)); \
- __ret; \
+#define runtime_const_ptr(sym) \
+({ \
+ typeof(sym) __ret; \
+ asm_inline(".option push\n\t" \
+ ".option norvc\n\t" \
+ "1:\t" \
+ "lui %[__ret], %%hi(" RUNTIME_MAGIC ")\n\t" \
+ "addi %[__ret],%[__ret], %%lo(" RUNTIME_MAGIC ")\n\t" \
+ ".option pop\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ : [__ret] "=r" (__ret)); \
+ __ret; \
})
#else
/*
@@ -46,10 +48,10 @@
".option push\n\t" \
".option norvc\n\t" \
"1:\t" \
- "lui %[__ret],0x89abd\n\t" \
- "lui %[__tmp],0x1234\n\t" \
- "addiw %[__ret],%[__ret],-0x211\n\t" \
- "addiw %[__tmp],%[__tmp],0x567\n\t" \
+ "lui %[__ret], %%hi(" RUNTIME_MAGIC ")\n\t" \
+ "lui %[__tmp], %%hi(" RUNTIME_MAGIC ")\n\t" \
+ "addiw %[__ret],%[__ret], %%lo(" RUNTIME_MAGIC ")\n\t" \
+ "addiw %[__tmp],%[__tmp], %%lo(" RUNTIME_MAGIC ")\n\t" \
#define RISCV_RUNTIME_CONST_64_BASE \
"slli %[__tmp],%[__tmp],32\n\t" \
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 5/8] riscv/runtime-const: Introduce runtime_const_mask_32()
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
` (3 preceding siblings ...)
2026-04-30 9:47 ` [PATCH v4 4/8] riscv/runtime-const: Replace open-coded placeholder with RUNTIME_MAGIC K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 6/8] s390/runtime-const: " K Prateek Nayak
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Guo Ren
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, Alexandre Ghiti, Charlie Jenkins, Jisheng Zhang,
Charles Mirabile
Futex hash computation requires a mask operation with read-only after
init data that will be converted to a runtime constant in the subsequent
commit.
Introduce runtime_const_mask_32 to further optimize the mask operation
in the futex hash computation hot path. GCC generates a:
lui a0, 0x12346 # upper; +0x800 then >>12 for correct rounding
addi a0, a0, 0x678 # lower 12 bits
and a1, a1, a0 # a1 = a1 & a0
pattern to tackle arbitrary 32-bit masks and the same was also suggested
by Claude which is implemented here. The final (__ret & val) operation
is intentionally placed outside of asm block to allow compilers to
further optimize it if possible.
__runtime_fixup_ptr() already patches a "lui + addi" sequence which has
been reused to patch the same sequence for __runtime_fixup_mask().
Assisted-by: Claude:claude-sonnet-4-5
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o Reverted back to using __ret as the macro variable to prevent
collision with local varaibles at callsite. (Sashiko)
o Separated out the & operation to prevent any confusion with operator
precedence id "val" is an expression. (Sashiko)
---
arch/riscv/include/asm/runtime-const.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h
index 1ce02605d2e43..684641cb0fe82 100644
--- a/arch/riscv/include/asm/runtime-const.h
+++ b/arch/riscv/include/asm/runtime-const.h
@@ -159,6 +159,23 @@
__ret; \
})
+#define runtime_const_mask_32(val, sym) \
+({ \
+ u32 __ret; \
+ asm_inline(".option push\n\t" \
+ ".option norvc\n\t" \
+ "1:\t" \
+ "lui %[__ret], %%hi(" RUNTIME_MAGIC ")\n\t" \
+ "addi %[__ret],%[__ret], %%lo(" RUNTIME_MAGIC ")\n\t" \
+ ".option pop\n\t" \
+ ".pushsection runtime_mask_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ : [__ret] "=r" (__ret)); \
+ __ret &= val; /* Allow compiler to optimize & operation. */ \
+ __ret; \
+})
+
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
@@ -262,6 +279,12 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
mutex_unlock(&text_mutex);
}
+static inline void __runtime_fixup_mask(void *where, unsigned long val)
+{
+ __runtime_fixup_32(where, where + 4, val);
+ __runtime_fixup_caches(where, 2);
+}
+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 6/8] s390/runtime-const: Introduce runtime_const_mask_32()
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
` (4 preceding siblings ...)
2026-04-30 9:47 ` [PATCH v4 5/8] riscv/runtime-const: Introduce runtime_const_mask_32() K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 7/8] asm-generic/runtime-const: Add dummy runtime_const_mask_32() K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 8/8] futex: Use runtime constants for __futex_hash() hot path K Prateek Nayak
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Heiko Carstens, Vasily Gorbik,
Alexander Gordeev
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, Christian Borntraeger, Sven Schnelle
Futex hash computation requires a mask operation with read-only after
init data that will be converted to a runtime constant in the subsequent
commit.
Introduce runtime_const_mask_32 to further optimize the mask operation
in the futex hash computation hot path.
GCC generates a:
nilf %r1,<imm32>
to tackle arbitrary 32-bit masks and the same is implemented here.
Immediate patching pattern for __runtime_fixup_mask() has been adopted
from __runtime_fixup_ptr().
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o No changes.
---
arch/s390/include/asm/runtime-const.h | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h
index 17878b1d048cf..7b71156031ecb 100644
--- a/arch/s390/include/asm/runtime-const.h
+++ b/arch/s390/include/asm/runtime-const.h
@@ -33,6 +33,20 @@
__ret; \
})
+#define runtime_const_mask_32(val, sym) \
+({ \
+ unsigned int __ret = (val); \
+ \
+ asm_inline( \
+ "0: nilf %[__ret],12\n" \
+ ".pushsection runtime_mask_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "+d" (__ret) \
+ : : "cc"); \
+ __ret; \
+})
+
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
@@ -43,12 +57,12 @@
__stop_runtime_##type##_##sym); \
} while (0)
-/* 32-bit immediate for iihf and iilf in bits in I2 field */
static inline void __runtime_fixup_32(u32 *p, unsigned int val)
{
s390_kernel_write(p, &val, sizeof(val));
}
+/* 32-bit immediate for iihf and iilf in bits in I2 field */
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
{
__runtime_fixup_32(where + 2, val >> 32);
@@ -65,6 +79,12 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
s390_kernel_write(where, &insn, sizeof(insn));
}
+/* 32-bit immediate for nilf in bits in I2 field */
+static inline void __runtime_fixup_mask(void *where, unsigned long val)
+{
+ __runtime_fixup_32(where + 2, val);
+}
+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 7/8] asm-generic/runtime-const: Add dummy runtime_const_mask_32()
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
` (5 preceding siblings ...)
2026-04-30 9:47 ` [PATCH v4 6/8] s390/runtime-const: " K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
2026-04-30 9:47 ` [PATCH v4 8/8] futex: Use runtime constants for __futex_hash() hot path K Prateek Nayak
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Arnd Bergmann
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak
From: Peter Zijlstra <peterz@infradead.org>
Add a dummy runtime_const_mask_32() for all the architectures that do
not support runtime-const.
Link: https://patch.msgid.link/20260227161841.GH606826@noisy.programming.kicks-ass.net
Not-yet-signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o No changes.
---
include/asm-generic/runtime-const.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h
index 6704994595145..03e6e3e02401e 100644
--- a/include/asm-generic/runtime-const.h
+++ b/include/asm-generic/runtime-const.h
@@ -10,6 +10,7 @@
*/
#define runtime_const_ptr(sym) (sym)
#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
+#define runtime_const_mask_32(val, sym) ((u32)(val)&(sym))
#define runtime_const_init(type,sym) do { } while (0)
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 8/8] futex: Use runtime constants for __futex_hash() hot path
2026-04-30 9:47 [PATCH v4 0/8] futex: Use runtime constants for futex_hash computation K Prateek Nayak
` (6 preceding siblings ...)
2026-04-30 9:47 ` [PATCH v4 7/8] asm-generic/runtime-const: Add dummy runtime_const_mask_32() K Prateek Nayak
@ 2026-04-30 9:47 ` K Prateek Nayak
7 siblings, 0 replies; 9+ messages in thread
From: K Prateek Nayak @ 2026-04-30 9:47 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Peter Zijlstra,
Sebastian Andrzej Siewior, Borislav Petkov, Dave Hansen, x86,
Catalin Marinas, Will Deacon, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
Arnd Bergmann, Guo Ren
Cc: Darren Hart, Davidlohr Bueso, André Almeida, linux-arch,
linux-kernel, linux-s390, linux-riscv, linux-arm-kernel,
K Prateek Nayak, H. Peter Anvin, Thomas Huth, Sean Christopherson,
Jisheng Zhang, Alexandre Ghiti, Charlie Jenkins, Charles Mirabile,
Christian Borntraeger, Sven Schnelle
From: Peter Zijlstra <peterz@infradead.org>
Runtime constify the read-only after init data __futex_shift(shift_32),
__futex_mask(mask_32), and __futex_queues(ptr) used in __futex_hash()
hot path to avoid referencing global variable.
This also allows __futex_queues to be allocated dynamically to
"nr_node_ids" slots instead of reserving config dependent MAX_NUMNODES
(1 << CONFIG_NODES_SHIFT) worth of slots upfront.
Runtime constants are initialized before their first access and
runtime_const_init() provides necessary barrier to ensure subsequent
accesses are not reordered against their initialization.
No functional changes intended.
[ prateek: Dynamically allocate __futex_queues, mark the global data
__ro_after_init since they are constified after futex_init(). ]
Link: https://patch.msgid.link/20260227161841.GH606826@noisy.programming.kicks-ass.net
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> # MAX_NUMNODES bloat
Not-yet-signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
changelog v3..v4:
o Added a small note on runtime_const_init() in the commit log based on
the concerns highlighted by Sashiko. No changes to the diff.
---
include/asm-generic/vmlinux.lds.h | 5 +++-
kernel/futex/core.c | 42 +++++++++++++++++--------------
2 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 60c8c22fd3e44..e80987d8016cc 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -970,7 +970,10 @@
RUNTIME_CONST(ptr, __dentry_cache) \
RUNTIME_CONST(ptr, __names_cache) \
RUNTIME_CONST(ptr, __filp_cache) \
- RUNTIME_CONST(ptr, __bfilp_cache)
+ RUNTIME_CONST(ptr, __bfilp_cache) \
+ RUNTIME_CONST(shift, __futex_shift) \
+ RUNTIME_CONST(mask, __futex_mask) \
+ RUNTIME_CONST(ptr, __futex_queues)
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
#define KUNIT_TABLE() \
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index ff2a4fb2993f0..73eade7184dc2 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -45,23 +45,19 @@
#include <linux/mempolicy.h>
#include <linux/mmap_lock.h>
+#include <asm/runtime-const.h>
+
#include "futex.h"
#include "../locking/rtmutex_common.h"
-/*
- * The base of the bucket array and its size are always used together
- * (after initialization only in futex_hash()), so ensure that they
- * reside in the same cacheline.
- */
-static struct {
- unsigned long hashmask;
- unsigned int hashshift;
- struct futex_hash_bucket *queues[MAX_NUMNODES];
-} __futex_data __read_mostly __aligned(2*sizeof(long));
+static u32 __futex_mask __ro_after_init;
+static u32 __futex_shift __ro_after_init;
+static struct futex_hash_bucket **__futex_queues __ro_after_init;
-#define futex_hashmask (__futex_data.hashmask)
-#define futex_hashshift (__futex_data.hashshift)
-#define futex_queues (__futex_data.queues)
+static __always_inline struct futex_hash_bucket **futex_queues(void)
+{
+ return runtime_const_ptr(__futex_queues);
+}
struct futex_private_hash {
int state;
@@ -439,14 +435,14 @@ __futex_hash(union futex_key *key, struct futex_private_hash *fph)
* NOTE: this isn't perfectly uniform, but it is fast and
* handles sparse node masks.
*/
- node = (hash >> futex_hashshift) % nr_node_ids;
+ node = runtime_const_shift_right_32(hash, __futex_shift) % nr_node_ids;
if (!node_possible(node)) {
node = find_next_bit_wrap(node_possible_map.bits,
nr_node_ids, node);
}
}
- return &futex_queues[node][hash & futex_hashmask];
+ return &futex_queues()[node][runtime_const_mask_32(hash, __futex_mask)];
}
/**
@@ -1916,7 +1912,7 @@ int futex_hash_allocate_default(void)
* 16 <= threads * 4 <= global hash size
*/
buckets = roundup_pow_of_two(4 * threads);
- buckets = clamp(buckets, 16, futex_hashmask + 1);
+ buckets = clamp(buckets, 16, __futex_mask + 1);
if (current_buckets >= buckets)
return 0;
@@ -1986,10 +1982,19 @@ static int __init futex_init(void)
hashsize = max(4, hashsize);
hashsize = roundup_pow_of_two(hashsize);
#endif
- futex_hashshift = ilog2(hashsize);
+ __futex_mask = hashsize - 1;
+ __futex_shift = ilog2(hashsize);
size = sizeof(struct futex_hash_bucket) * hashsize;
order = get_order(size);
+ __futex_queues = kcalloc(nr_node_ids, sizeof(*__futex_queues), GFP_KERNEL);
+
+ runtime_const_init(shift, __futex_shift);
+ runtime_const_init(mask, __futex_mask);
+ runtime_const_init(ptr, __futex_queues);
+
+ BUG_ON(!futex_queues());
+
for_each_node(n) {
struct futex_hash_bucket *table;
@@ -2003,10 +2008,9 @@ static int __init futex_init(void)
for (i = 0; i < hashsize; i++)
futex_hash_bucket_init(&table[i], NULL);
- futex_queues[n] = table;
+ futex_queues()[n] = table;
}
- futex_hashmask = hashsize - 1;
pr_info("futex hash table entries: %lu (%lu bytes on %d NUMA nodes, total %lu KiB, %s).\n",
hashsize, size, num_possible_nodes(), size * num_possible_nodes() / 1024,
order > MAX_PAGE_ORDER ? "vmalloc" : "linear");
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread