* [RFC 0/8] x86: Mad WARN() hackery
@ 2025-06-02 14:42 Peter Zijlstra
2025-06-02 14:42 ` [RFC 1/8] x86: Provide assembly __bug_table helpers Peter Zijlstra
` (7 more replies)
0 siblings, 8 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Hi,
I've been annoyed at how WARN() works for quite some time, and Alessandro has
been trying to make it even worse.
This is an attempt at making WARN() generate less crap for the most common
cases. It is somewhat magical, but appears to be working for the simple cases
I've tried. Specifically, it moves the __warn_printf() into the exception for
the case of 3 or less arguments.
This would hopefully allow Allesandro to put the kunit hackery in report_bug()
and not spread it around all the WARN() sites like he does now.
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 1/8] x86: Provide assembly __bug_table helpers
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 2/8] bug: Add BUGFLAG_FORMAT infrastructure Peter Zijlstra
` (6 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Rework the __bug_table helpers such that usage from assembly becomes
possible.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/bug.h | 54 ++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 30 deletions(-)
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -32,46 +32,40 @@
#ifdef CONFIG_GENERIC_BUG
#ifdef CONFIG_X86_32
-# define __BUG_REL(val) ".long " __stringify(val)
+#define ASM_BUG_REL(val) .long val
#else
-# define __BUG_REL(val) ".long " __stringify(val) " - ."
+#define ASM_BUG_REL(val) .long val - .
#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define ASM_BUGTABLE_VERBOSE(file, line) \
+ ASM_BUG_REL(file) ; \
+ .word line
+#define ASM_BUGTABLE_VERBOSE_SIZE 6 /* sizeof(file) + sizeof(line) */
+#else
+#define ASM_BUGTABLE_VERBOSE(file, line)
+#define ASM_BUGTABLE_VERBOSE_SIZE 0
+#endif
-#define _BUG_FLAGS(ins, flags, extra) \
-do { \
- asm_inline volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
- "\t.word %c1" "\t# bug_entry::line\n" \
- "\t.word %c2" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c3\n" \
- ".popsection\n" \
- extra \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
+#define ASM_BUGTABLE_BASE_SIZE 6 /* sizeof(bug_addr) + sizeof(flags) */
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
+#define ASM_BUGTABLE_FLAGS(at, file, line, flags) \
+ .pushsection __bug_table, "aw" ; \
+ 123: ASM_BUG_REL(at) ; \
+ ASM_BUGTABLE_VERBOSE(file, line) ; \
+ .word flags ; \
+ .org 123b + ASM_BUGTABLE_BASE_SIZE + ASM_BUGTABLE_VERBOSE_SIZE ;\
+ .popsection
-#define _BUG_FLAGS(ins, flags, extra) \
+#define _BUG_FLAGS(insn, flags, extra) \
do { \
- asm_inline volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t.word %c0" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c1\n" \
- ".popsection\n" \
- extra \
- : : "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
+ asm_inline volatile("1:\t" insn "\n" \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[file], %c[line], %c[fl])) "\n" \
+ extra \
+ : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ [fl] "i" (flags)); \
} while (0)
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
#else
#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 2/8] bug: Add BUGFLAG_FORMAT infrastructure
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
2025-06-02 14:42 ` [RFC 1/8] x86: Provide assembly __bug_table helpers Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 3/8] bug: Clean up CONFIG_GENERIC_BUG_RELATIVE_POINTERS Peter Zijlstra
` (5 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Add BUGFLAG_FORMAT; an architecture opt-in feature that allows adding
the WARN_printf() format string to the bug_entry table.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/asm-generic/bug.h | 8 ++++++++
lib/bug.c | 20 ++++++++++++++++++++
2 files changed, 28 insertions(+)
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -13,6 +13,7 @@
#define BUGFLAG_ONCE (1 << 1)
#define BUGFLAG_DONE (1 << 2)
#define BUGFLAG_NO_CUT_HERE (1 << 3) /* CUT_HERE already sent */
+#define BUGFLAG_FORMAT (1 << 4)
#define BUGFLAG_TAINT(taint) ((taint) << 8)
#define BUG_GET_TAINT(bug) ((bug)->flags >> 8)
#endif
@@ -36,6 +37,13 @@ struct bug_entry {
#else
signed int bug_addr_disp;
#endif
+#ifdef HAVE_ARCH_BUG_FORMAT
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+ const char *format;
+#else
+ signed int format_disp;
+#endif
+#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE
#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
const char *file;
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -139,6 +139,19 @@ void bug_get_file_line(struct bug_entry
#endif
}
+static const char *bug_get_format(struct bug_entry *bug)
+{
+ const char *format = NULL;
+#ifdef HAVE_ARCH_BUG_FORMAT
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+ format = (const char *)&bug->format_disp + bug->format_disp;
+#else
+ format = bug->format;
+#endif
+#endif
+ return format;
+}
+
struct bug_entry *find_bug(unsigned long bugaddr)
{
struct bug_entry *bug;
@@ -150,6 +163,10 @@ struct bug_entry *find_bug(unsigned long
return module_find_bug(bugaddr);
}
+#ifndef __warn_printf
+#define __warn_printf(...)
+#endif
+
static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
{
struct bug_entry *bug;
@@ -190,6 +207,9 @@ static enum bug_trap_type __report_bug(u
if ((bug->flags & BUGFLAG_NO_CUT_HERE) == 0)
printk(KERN_DEFAULT CUT_HERE);
+ if (bug->flags & BUGFLAG_FORMAT)
+ __warn_printf(bug_get_format(bug), regs);
+
if (warning) {
/* this is a WARN_ON rather than BUG/BUG_ON */
__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 3/8] bug: Clean up CONFIG_GENERIC_BUG_RELATIVE_POINTERS
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
2025-06-02 14:42 ` [RFC 1/8] x86: Provide assembly __bug_table helpers Peter Zijlstra
2025-06-02 14:42 ` [RFC 2/8] bug: Add BUGFLAG_FORMAT infrastructure Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 4/8] bug: Allow architectures to provide __WARN_printf() Peter Zijlstra
` (4 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Three repeated CONFIG_GENERIC_BUG_RELATIVE_POINTERS #ifdefs right
after one another yields unreadable code. Add a helper.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/asm-generic/bug.h | 22 ++++++++--------------
1 file changed, 8 insertions(+), 14 deletions(-)
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -30,26 +30,20 @@ void __warn(const char *file, int line,
#ifdef CONFIG_BUG
-#ifdef CONFIG_GENERIC_BUG
-struct bug_entry {
#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
- unsigned long bug_addr;
+#define BUG_REL(type, name) type name
#else
- signed int bug_addr_disp;
+#define BUG_REL(type, name) signed int name##_disp
#endif
+
+#ifdef CONFIG_GENERIC_BUG
+struct bug_entry {
+ BUG_REL(unsigned long, bug_addr);
#ifdef HAVE_ARCH_BUG_FORMAT
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
- const char *format;
-#else
- signed int format_disp;
-#endif
+ BUG_REL(const char *, format);
#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
- const char *file;
-#else
- signed int file_disp;
-#endif
+ BUG_REL(const char *, file);
unsigned short line;
#endif
unsigned short flags;
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 4/8] bug: Allow architectures to provide __WARN_printf()
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
` (2 preceding siblings ...)
2025-06-02 14:42 ` [RFC 3/8] bug: Clean up CONFIG_GENERIC_BUG_RELATIVE_POINTERS Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 5/8] x86_64/bug: Add BUG_FORMAT basics Peter Zijlstra
` (3 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Instead of providing __WARN_FLAGS(), allow an architecture to provide
__WARN_printf(), which allows for optimizing WARN(), rather than
WARN_ON().
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/asm-generic/bug.h | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -94,14 +94,7 @@ void warn_slowpath_fmt(const char *file,
const char *fmt, ...);
extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#ifndef __WARN_FLAGS
-#define __WARN() __WARN_printf(TAINT_WARN, NULL)
-#define __WARN_printf(taint, arg...) do { \
- instrumentation_begin(); \
- warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \
- instrumentation_end(); \
- } while (0)
-#else
+#if defined(__WARN_FLAGS) && !defined(__WARN_printf)
#define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
#define __WARN_printf(taint, arg...) do { \
instrumentation_begin(); \
@@ -118,6 +111,18 @@ extern __printf(1, 2) void __warn_printk
})
#endif
+#ifndef __WARN_printf
+#define __WARN_printf(taint, arg...) do { \
+ instrumentation_begin(); \
+ warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \
+ instrumentation_end(); \
+ } while (0)
+#endif
+
+#ifndef __WARN
+#define __WARN() __WARN_printf(TAINT_WARN, NULL)
+#endif
+
/* used internally by panic.c */
#ifndef WARN_ON
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 5/8] x86_64/bug: Add BUG_FORMAT basics
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
` (3 preceding siblings ...)
2025-06-02 14:42 ` [RFC 4/8] bug: Allow architectures to provide __WARN_printf() Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 6/8] x86_64/bug: Implement __WARN_printf() Peter Zijlstra
` (2 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Opt-in to BUG_FORMAT for x86_64, adjust the BUGTABLE helper and for
now, just store NULL pointers.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/bug.h | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -31,6 +31,7 @@
#ifdef CONFIG_GENERIC_BUG
+
#ifdef CONFIG_X86_32
#define ASM_BUG_REL(val) .long val
#else
@@ -47,22 +48,35 @@
#define ASM_BUGTABLE_VERBOSE_SIZE 0
#endif
+#ifdef CONFIG_X86_64
+#define HAVE_ARCH_BUG_FORMAT
+#define ASM_BUGTABLE_FORMAT(format) \
+ ASM_BUG_REL(format)
+#define ASM_BUGTABLE_FORMAT_SIZE 4 /* sizeof(format) */
+#else
+#define ASM_BUGTABLE_FORMAT(format)
+#define ASM_BUGTABLE_FORMAT_SIZE 0
+#endif
+
#define ASM_BUGTABLE_BASE_SIZE 6 /* sizeof(bug_addr) + sizeof(flags) */
-#define ASM_BUGTABLE_FLAGS(at, file, line, flags) \
+#define ASM_BUGTABLE_FLAGS(at, format, file, line, flags) \
.pushsection __bug_table, "aw" ; \
123: ASM_BUG_REL(at) ; \
+ ASM_BUGTABLE_FORMAT(format) ; \
ASM_BUGTABLE_VERBOSE(file, line) ; \
.word flags ; \
- .org 123b + ASM_BUGTABLE_BASE_SIZE + ASM_BUGTABLE_VERBOSE_SIZE ;\
+ .org 123b + ASM_BUGTABLE_BASE_SIZE + ASM_BUGTABLE_FORMAT_SIZE + ASM_BUGTABLE_VERBOSE_SIZE ; \
.popsection
#define _BUG_FLAGS(insn, flags, extra) \
do { \
asm_inline volatile("1:\t" insn "\n" \
- __stringify(ASM_BUGTABLE_FLAGS(1b, %c[file], %c[line], %c[fl])) "\n" \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[fmt], %c[file], %c[line], %c[fl])) "\n" \
extra \
- : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ : : [fmt] "i" (NULL), \
+ [file] "i" (__FILE__), \
+ [line] "i" (__LINE__), \
[fl] "i" (flags)); \
} while (0)
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
` (4 preceding siblings ...)
2025-06-02 14:42 ` [RFC 5/8] x86_64/bug: Add BUG_FORMAT basics Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 15:02 ` Linus Torvalds
2025-06-02 14:42 ` [RFC 7/8] x86/bug: Implement WARN_ONCE() Peter Zijlstra
2025-06-02 14:42 ` [RFC 8/8] x86: Clean up default rethunk warning Peter Zijlstra
7 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Optimize __WARN_printf() for the common number of arguments.
By storing the WARN() format string in the bug_entry, it becomes
possible to print this from the exception handler, instead of having
to emit a __warn_printf() call before tripping the exception.
This yields more compact code, while also providing an opportunity to
simplify WARN_ONCE() -- in a future patch.
The only 'minor' detail is how to get the printf() arguments into the
exception handler.
Note that the regular calling convention uses 6 registers to pass
arguments (before pushing to the stack) and observe that UD1 has a
ModRM byte which can encode two registers.
This then suggests using UD1 exceptions for small __WARN_printf()
argument numbers, encoding the registers holding the arguments in the
UD1 instruction.
Except that, when creating a histogram of the number of arguments
given to WARN (including WARN_ONCE):
defconfig: allyesconfig:
899 __COUNT_printf_3 9694 __COUNT_printf_0
795 __COUNT_printf_2 4498 __COUNT_printf_1
360 __COUNT_printf_0 3213 __COUNT_printf_3
212 __COUNT_printf_1 2949 __COUNT_printf_2
86 __COUNT_printf_4 186 __COUNT_printf_4
37 __COUNT_printf_5 84 __COUNT_printf_5
15 __COUNT_printf_6 50 __COUNT_printf_6
14 __COUNT_printf_7 16 __COUNT_printf_7
3 __COUNT_printf_8 3 __COUNT_printf_8
3 __COUNT_printf_12 3 __COUNT_printf_12
2 __COUNT_printf_9
It becomes clear that supporting 3 arguments is rather critical. This
requires encoding 3 registers in an instruction, which calls for some
creativity :-)
Use the form:
UD1 disp8(%ecx), %reg
And encode two registers (one per nibble) in the displacement value.
Use (%ecx) as base, because UBSAN already uses (%eax) to encode its
immediates.
This then yields the following encodings:
nr args: insn:
0 UD2
1 UD1 (%ecx), %reg
2 UD1 %reg2, %reg1
3 UD1 disp8(%ecx), %reg1
Use the normal COUNT_ARGS() trick to split the variadic WARN() macro
into per nr_args sub-marcos, except use a custom mapping such that 4
and above map to another variadic that does the current thing as
fallback.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/bug.h | 115 +++++++++++++++++++++++++++++++++++++
arch/x86/kernel/traps.c | 137 ++++++++++++++++++++++++++++++++++++---------
2 files changed, 225 insertions(+), 27 deletions(-)
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -5,6 +5,7 @@
#include <linux/stringify.h>
#include <linux/instrumentation.h>
#include <linux/objtool.h>
+#include <linux/args.h>
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
@@ -26,6 +27,7 @@
#define BUG_UD2 0xfffe
#define BUG_UD1 0xfffd
#define BUG_UD1_UBSAN 0xfffc
+#define BUG_UD1_WARN 0xfffb
#define BUG_EA 0xffea
#define BUG_LOCK 0xfff0
@@ -108,6 +110,119 @@ do { \
instrumentation_end(); \
} while (0)
+#ifdef HAVE_ARCH_BUG_FORMAT
+
+#ifndef __ASSEMBLER__
+struct pt_regs;
+extern void __warn_printf(const char *fmt, struct pt_regs *regs);
+#define __warn_printf __warn_printf
+#endif
+
+#define __WARN_printf_0(flags, format) \
+do { \
+ __auto_type __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_FORMAT; \
+ instrumentation_begin(); \
+ asm_inline volatile("1: ud2\n" \
+ ANNOTATE_REACHABLE(1b) \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[fmt], %c[file], %c[line], %c[fl])) "\n" \
+ : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ [fl] "i" (__flags), [fmt] "i" (format)); \
+ instrumentation_end(); \
+} while (0)
+
+#define __WARN_printf_1(flags, format, arg1) \
+do { \
+ __auto_type __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_FORMAT; \
+ instrumentation_begin(); \
+ asm_inline volatile("1: ud1 (%%ecx), %[a1]\n" \
+ ANNOTATE_REACHABLE(1b) \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[fmt], %c[file], %c[line], %c[fl])) "\n" \
+ : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ [fl] "i" (__flags), [fmt] "i" (format), \
+ [a1] "r" ((unsigned long)(arg1))); \
+ instrumentation_end(); \
+} while (0)
+
+#define __WARN_printf_2(flags, format, arg1, arg2) \
+do { \
+ __auto_type __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_FORMAT; \
+ instrumentation_begin(); \
+ asm_inline volatile("1: ud1 %[a2], %[a1]\n" \
+ ANNOTATE_REACHABLE(1b) \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[fmt], %c[file], %c[line], %c[fl])) "\n" \
+ : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ [fl] "i" (__flags), [fmt] "i" (format), \
+ [a1] "r" ((unsigned long)(arg1)), \
+ [a2] "r" ((unsigned long)(arg2))); \
+ instrumentation_end(); \
+} while (0)
+
+/*
+ * warn_add_reg var reg -- adds the machine register index to var
+ */
+#define DEFINE_WARN_REG \
+ ".macro warn_add_reg var:req reg:req\n" \
+ ".set .Lfound, 0\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".set \\var, \\var + .Lregnr\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".set \\var, \\var + .Lregnr\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".if (.Lfound != 1)\n" \
+ ".error \"warn_add_reg: bad register argument\"\n" \
+ ".endif\n" \
+ ".endm\n"
+
+#define UNDEFINE_WARN_REG \
+ ".purgem warn_add_reg\n"
+
+#define __WARN_printf_3(flags, format, arg1, arg2, arg3) \
+do { \
+ __auto_type __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_FORMAT; \
+ instrumentation_begin(); \
+ asm_inline volatile( \
+ DEFINE_WARN_REG \
+ ".set warn_imm, 0\n" \
+ "warn_add_reg warn_imm, %[a3]\n" \
+ ".set warn_imm, (warn_imm << 4)\n" \
+ "warn_add_reg warn_imm, %[a2]\n" \
+ UNDEFINE_WARN_REG \
+ "1: ud1 warn_imm(%%ecx),%[a1]\n" \
+ ANNOTATE_REACHABLE(1b) \
+ __stringify(ASM_BUGTABLE_FLAGS(1b, %c[fmt], %c[file], %c[line], %c[fl])) "\n" \
+ : : [file] "i" (__FILE__), [line] "i" (__LINE__), \
+ [fl] "i" (__flags), [fmt] "i" (format), \
+ [a1] "r" ((unsigned long)(arg1)), \
+ [a2] "r" ((unsigned long)(arg2)), \
+ [a3] "r" ((unsigned long)(arg3))); \
+ instrumentation_end(); \
+} while (0)
+
+#define __WARN_printf_n(flags, fmt, arg...) do { \
+ instrumentation_begin(); \
+ __warn_printk(fmt, arg); \
+ __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | (flags)); \
+ instrumentation_end(); \
+ } while (0)
+
+#define WARN_ARGS(X...) __COUNT_ARGS(, ##X, n, n, n, n, n, n, n, n, n, n, n, n, 3, 2, 1, 0)
+
+#define __WARN_printf(taint, fmt, arg...) \
+ CONCATENATE(__WARN_printf_, WARN_ARGS(arg))(BUGFLAG_TAINT(taint), fmt, ## arg)
+
+#endif /* HAVE_ARCH_BUG_FORMAT */
+
#include <asm-generic/bug.h>
#endif /* _ASM_X86_BUG_H */
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -81,18 +81,6 @@
DECLARE_BITMAP(system_vectors, NR_VECTORS);
-__always_inline int is_valid_bugaddr(unsigned long addr)
-{
- if (addr < TASK_SIZE_MAX)
- return 0;
-
- /*
- * We got #UD, if the text isn't readable we'd have gotten
- * a different exception.
- */
- return *(unsigned short *)addr == INSN_UD2;
-}
-
/*
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
* If it's a UD1, further decode to determine its use:
@@ -103,24 +91,49 @@ __always_inline int is_valid_bugaddr(uns
* UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
* static_call: 0f b9 cc ud1 %esp,%ecx
*
- * Notably UBSAN uses EAX, static_call uses ECX.
+ * WARN_printf_0: ud2
+ * WARN_printf_1: ud1 (%ecx),%reg
+ * WARN_printf_2: ud1 %reg,%reg
+ * WARN_printf_3: ud1 0xBA(%ecx),%reg
+ *
+ * Notably UBSAN uses (%eax), static_call uses %esp.
+ *
+ * @regs will return one register per nibble, typically ModRM reg in the low
+ * nibble and ModRM rm in the next nibble (including REX bits). In case of the
+ * WARN_printf_3 case the 8 bit immediate is used to encode two registers and
+ * a total of 3 nibbles will be set.
+ *
+ * @imm will return the immediate value encoded in the instruction, or 0.
+ *
+ * @len will return the length of the instruction decoded.
*/
-__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
+__always_inline int decode_bug(unsigned long addr, u32 *regs, s32 *imm, int *len)
{
unsigned long start = addr;
+ u8 v, rex = 0, reg, rm;
bool lock = false;
- u8 v;
+ int type = BUG_UD1;
if (addr < TASK_SIZE_MAX)
return BUG_NONE;
- v = *(u8 *)(addr++);
- if (v == INSN_ASOP)
+ for (;;) {
v = *(u8 *)(addr++);
- if (v == INSN_LOCK) {
- lock = true;
- v = *(u8 *)(addr++);
+ if (v == INSN_ASOP)
+ continue;
+
+ if (v == INSN_LOCK) {
+ lock = true;
+ continue;
+ }
+
+ if ((v & 0xf0) == 0x40) {
+ rex = v;
+ continue;
+ }
+
+ break;
}
switch (v) {
@@ -141,6 +154,9 @@ __always_inline int decode_bug(unsigned
return BUG_NONE;
}
+ *regs = 0;
+ *imm = 0;
+
v = *(u8 *)(addr++);
if (v == SECOND_BYTE_OPCODE_UD2) {
*len = addr - start;
@@ -150,19 +166,33 @@ __always_inline int decode_bug(unsigned
if (v != SECOND_BYTE_OPCODE_UD1)
return BUG_NONE;
- *imm = 0;
v = *(u8 *)(addr++); /* ModRM */
-
if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4)
addr++; /* SIB */
+ reg = X86_MODRM_REG(v) + 8*!!X86_REX_R(rex);
+ rm = X86_MODRM_RM(v) + 8*!!X86_REX_B(rex);
+ *regs = (rm << 4) | reg;
+
/* Decode immediate, if present */
switch (X86_MODRM_MOD(v)) {
case 0: if (X86_MODRM_RM(v) == 5)
- addr += 4; /* RIP + disp32 */
+ addr += 4; /* RIP + disp32 */
+ if (rm == 1) /* CX */
+ type = BUG_UD1_WARN;
break;
case 1: *imm = *(s8 *)addr;
+ if (rm == 1) { /* CX */
+ /*
+ * The 8bit immediate is used to encode two more
+ * registers, while the rm value is used to encode
+ * this is a UD1_WARN. Munge the immediate into the
+ * regs value such that 3 nibbles are set.
+ */
+ *regs = ((*(u8 *)addr) << 4) | reg;
+ type = BUG_UD1_WARN;
+ }
addr += 1;
break;
@@ -170,18 +200,37 @@ __always_inline int decode_bug(unsigned
addr += 4;
break;
- case 3: break;
+ case 3: if (rm != 4) /* SP */
+ type = BUG_UD1_WARN;
+ break;
}
/* record instruction length */
*len = addr - start;
- if (X86_MODRM_REG(v) == 0) /* EAX */
+ if (!rm && X86_MODRM_MOD(v) != 3) /* (%eax) */
return BUG_UD1_UBSAN;
- return BUG_UD1;
+ return type;
}
+int is_valid_bugaddr(unsigned long addr)
+{
+ int ud_type, ud_len;
+ u32 ud_regs;
+ s32 ud_imm;
+
+ if (addr < TASK_SIZE_MAX)
+ return 0;
+
+ /*
+ * We got #UD, if the text isn't readable we'd have gotten
+ * a different exception.
+ */
+ ud_type = decode_bug(addr, &ud_regs, &ud_imm, &ud_len);
+
+ return ud_type == BUG_UD2 || ud_type == BUG_UD1_WARN;
+}
static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
@@ -305,14 +354,42 @@ static inline void handle_invalid_op(str
ILL_ILLOPN, error_get_trap_addr(regs));
}
+#ifdef HAVE_ARCH_BUG_FORMAT
+static inline unsigned long pt_regs_val(struct pt_regs *regs, int nr)
+{
+ int offset = pt_regs_offset(regs, nr);
+ if (WARN_ON_ONCE(offset < -0))
+ return 0;
+ return *((unsigned long *)((void *)regs + offset));
+}
+
+void __warn_printf(const char *fmt, struct pt_regs *regs)
+{
+ unsigned long a1, a2, a3;
+ u32 r = regs->orig_ax;
+
+ /*
+ * @r is the ud_regs value from decode_bug() which will have at most 3
+ * registers encoded. Notably printk() will ignore any spurious
+ * arguments.
+ */
+ a1 = pt_regs_val(regs, (r >> 0) & 0xf);
+ a2 = pt_regs_val(regs, (r >> 4) & 0xf);
+ a3 = pt_regs_val(regs, (r >> 8) & 0xf);
+
+ printk(fmt, a1, a2, a3);
+}
+#endif
+
static noinstr bool handle_bug(struct pt_regs *regs)
{
unsigned long addr = regs->ip;
bool handled = false;
int ud_type, ud_len;
+ u32 ud_regs;
s32 ud_imm;
- ud_type = decode_bug(addr, &ud_imm, &ud_len);
+ ud_type = decode_bug(addr, &ud_regs, &ud_imm, &ud_len);
if (ud_type == BUG_NONE)
return handled;
@@ -334,7 +411,13 @@ static noinstr bool handle_bug(struct pt
raw_local_irq_enable();
switch (ud_type) {
+ case BUG_UD1_WARN:
case BUG_UD2:
+ /*
+ * #UD does not have an error code, use orig_ax to pass the ud_regs value
+ * to __warn_printf().
+ */
+ regs->orig_ax = ud_regs;
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
handled = true;
break;
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 7/8] x86/bug: Implement WARN_ONCE()
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
` (5 preceding siblings ...)
2025-06-02 14:42 ` [RFC 6/8] x86_64/bug: Implement __WARN_printf() Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
2025-06-02 14:42 ` [RFC 8/8] x86: Clean up default rethunk warning Peter Zijlstra
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Implement WARN_ONCE like WARN using BUGFLAG_ONCE.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/bug.h | 28 ++++++++++++++++++++++++++++
include/asm-generic/bug.h | 2 ++
2 files changed, 30 insertions(+)
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -221,6 +221,34 @@ do { \
#define __WARN_printf(taint, fmt, arg...) \
CONCATENATE(__WARN_printf_, WARN_ARGS(arg))(BUGFLAG_TAINT(taint), fmt, ## arg)
+#define __WONCE_printf_0(flags, format) \
+ __WARN_printf_0(flags|BUGFLAG_ONCE, format)
+
+#define __WONCE_printf_1(flags, format, arg1) \
+ __WARN_printf_1(flags|BUGFLAG_ONCE, format, arg1)
+
+#define __WONCE_printf_2(flags, format, arg1, arg2) \
+ __WARN_printf_2(flags|BUGFLAG_ONCE, format, arg1, arg2)
+
+#define __WONCE_printf_3(flags, format, arg1, arg2, arg3) \
+ __WARN_printf_3(flags|BUGFLAG_ONCE, format, arg1, arg2, arg3)
+
+#define __WONCE_printf_n(flags, format, arg...) \
+ if (__ONCE_LITE_IF(true)) __WARN_printf_n(flags, format, arg)
+
+#define _WARN_ONCE(cond, format, arg...) ({ \
+ int __ret_warn_on = !!(cond); \
+ if (unlikely(__ret_warn_on)) { \
+ CONCATENATE(__WONCE_printf_, WARN_ARGS(arg))(0, format, ## arg); \
+ } \
+ __ret_warn_on; \
+})
+
+/*
+ * Make sure WARN_ONCE() arguments get expanded before trying to count them.
+ */
+#define WARN_ONCE(cond, format...) _WARN_ONCE(cond, format)
+
#endif /* HAVE_ARCH_BUG_FORMAT */
#include <asm-generic/bug.h>
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -155,8 +155,10 @@ extern __printf(1, 2) void __warn_printk
DO_ONCE_LITE_IF(condition, WARN_ON, 1)
#endif
+#ifndef WARN_ONCE
#define WARN_ONCE(condition, format...) \
DO_ONCE_LITE_IF(condition, WARN, 1, format)
+#endif
#define WARN_TAINT_ONCE(condition, taint, format...) \
DO_ONCE_LITE_IF(condition, WARN_TAINT, 1, taint, format)
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC 8/8] x86: Clean up default rethunk warning
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
` (6 preceding siblings ...)
2025-06-02 14:42 ` [RFC 7/8] x86/bug: Implement WARN_ONCE() Peter Zijlstra
@ 2025-06-02 14:42 ` Peter Zijlstra
7 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 14:42 UTC (permalink / raw)
To: x86; +Cc: linux-kernel, peterz, kees, acarmina, jpoimboe, mark.rutland,
torvalds
Replace the funny __warn_thunk thing with a more regular WARN_ONCE()
and simplify the ifdeffery.
Notably this avoids RET from having recursive RETs (once from the
thunk and once from the C function) -- recurive RET makes my head hurt
for no good reason.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/entry/entry.S | 4 ----
arch/x86/include/asm/nospec-branch.h | 2 --
arch/x86/kernel/cpu/bugs.c | 5 -----
arch/x86/lib/retpoline.S | 24 +++++++++++++++++-------
4 files changed, 17 insertions(+), 18 deletions(-)
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -13,8 +13,6 @@
#include <asm/cpufeatures.h>
#include <asm/nospec-branch.h>
-#include "calling.h"
-
.pushsection .noinstr.text, "ax"
/* Clobbers AX, CX, DX */
@@ -53,8 +51,6 @@ EXPORT_SYMBOL_GPL(mds_verw_sel);
.popsection
-THUNK warn_thunk_thunk, __warn_thunk
-
/*
* Clang's implementation of TLS stack cookies requires the variable in
* question to be a TLS variable. If the variable happens to be defined as an
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -386,8 +386,6 @@ extern void clear_bhb_loop(void);
extern void (*x86_return_thunk)(void);
-extern void __warn_thunk(void);
-
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
extern void call_depth_return_thunk(void);
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -3415,8 +3415,3 @@ ssize_t cpu_show_indirect_target_selecti
return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
}
#endif
-
-void __warn_thunk(void)
-{
- WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n");
-}
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -12,6 +12,7 @@
#include <asm/percpu.h>
#include <asm/frame.h>
#include <asm/nops.h>
+#include <asm/bug.h>
.section .text..__x86.indirect_thunk
@@ -416,6 +417,21 @@ EXPORT_SYMBOL(its_return_thunk)
#endif /* CONFIG_MITIGATION_ITS */
+ .pushsection .rodata.str1.1
+.Lwarn:
+ .string "Unpatched return thunk in use.\n"
+ .popsection
+
+/*
+ * Helper that will trip WARN_ONCE() after alternatives have ran.
+ */
+#define ALT_WARN_RETHUNK \
+ 1: ALTERNATIVE "", "ud2", X86_FEATURE_ALWAYS ; \
+ ASM_BUGTABLE_FLAGS(1b, .Lwarn, \
+ 0, 0, BUGFLAG_WARNING | \
+ BUGFLAG_ONCE | BUGFLAG_FORMAT) ; \
+ ANNOTATE_REACHABLE
+
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
@@ -432,15 +448,9 @@ EXPORT_SYMBOL(its_return_thunk)
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
-#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \
- defined(CONFIG_MITIGATION_SRSO) || \
- defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)
- ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
- "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
-#else
+ ALT_WARN_RETHUNK
ANNOTATE_UNRET_SAFE
ret
-#endif
int3
SYM_CODE_END(__x86_return_thunk)
SYM_PIC_ALIAS(__x86_return_thunk)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 14:42 ` [RFC 6/8] x86_64/bug: Implement __WARN_printf() Peter Zijlstra
@ 2025-06-02 15:02 ` Linus Torvalds
2025-06-02 15:49 ` Peter Zijlstra
0 siblings, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2025-06-02 15:02 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, 2 Jun 2025 at 07:52, Peter Zijlstra <peterz@infradead.org> wrote:
>
> Use the normal COUNT_ARGS() trick to split the variadic WARN() macro
> into per nr_args sub-marcos, except use a custom mapping such that 4
> and above map to another variadic that does the current thing as
> fallback.
Does this horror work with clang? Because I suspect not. The games you
play with inline asm are too disgusting for words.
But honestly, even if it does,I really hate this kind of insane
complexity for dubious reasons.
If you have a warning that is *so* critical for performance that you
can't deal with the register movement that comes from the compiler
doing this for you, just remove the warning.
Don't make our build system do something this disgusting.
Linus
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 15:02 ` Linus Torvalds
@ 2025-06-02 15:49 ` Peter Zijlstra
2025-06-02 16:38 ` Linus Torvalds
0 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 15:49 UTC (permalink / raw)
To: Linus Torvalds; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, Jun 02, 2025 at 08:02:24AM -0700, Linus Torvalds wrote:
> On Mon, 2 Jun 2025 at 07:52, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Use the normal COUNT_ARGS() trick to split the variadic WARN() macro
> > into per nr_args sub-marcos, except use a custom mapping such that 4
> > and above map to another variadic that does the current thing as
> > fallback.
>
> Does this horror work with clang? Because I suspect not. The games you
> play with inline asm are too disgusting for words.
Yes, it absolutely builds with clang. The inline asm isn't something we
don't already do elsewhere :-) *cough* extable *cough*
> But honestly, even if it does,I really hate this kind of insane
> complexity for dubious reasons.
>
> If you have a warning that is *so* critical for performance that you
> can't deal with the register movement that comes from the compiler
> doing this for you, just remove the warning.
>
> Don't make our build system do something this disgusting.
It isn't just the register movement. What we currently have for WARN()
is:
WARN(cond, fmt, arg...)
if (unlikely(cond)) {
__warn_printf(fmt, arg);
ud2
}
Where the UD2 bug_entry will have NO_CUT_HERE, because __warn_printf()
will have started the printing.
Part of the problem is with unlikely() not causing the text to break out
into .text.unlikely, but at best it moves it to the end of whatever
function we're in.
This also means that if you do WARN_ONCE() the whole ONCE machinery is
also dumped into that function.
And now someone wants to go add some KUnit specific testing to this as
well, which is also dumped into the function.
This is all cruft that shouldn't be in the normal .text.
The horrors I did will change things into:
if (unlikely(cond))
ud1 regs
where the bug_entry will then have the fmt, and the ud1 instruction some
regs (provided 3 or less args). Then all the ONCE and KUnit crap can
live in the exception handler. Not littered around the real code.
Now, I can still relate to: "this is too horrible for words". But then I
would strongly suggest people go poke at the compilers so we can get:
if (really_unlikely(cond)) {
whatever;
}
such that the compiler is forced to split whatever into a cold
sub-function placed in .text.unlikely. Then it doesn't really matter how
much crap we stick in there, it will not affect the normal code paths /
I$.
Anyway, I had fun hacking this up :-)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 15:49 ` Peter Zijlstra
@ 2025-06-02 16:38 ` Linus Torvalds
2025-06-02 18:09 ` Peter Zijlstra
2025-06-02 21:57 ` Peter Zijlstra
0 siblings, 2 replies; 21+ messages in thread
From: Linus Torvalds @ 2025-06-02 16:38 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, 2 Jun 2025 at 08:50, Peter Zijlstra <peterz@infradead.org> wrote:
>
> Yes, it absolutely builds with clang. The inline asm isn't something we
> don't already do elsewhere :-) *cough* extable *cough*
Eww. I hadn't looked at that (or repressed it if I did). *Shudder*.
But looking around, I don't think any of the normal code I ever look
at actually *generate* that disgusting thing.
I had to search for it, and looked at the absolute horror it generates
in the futex code, and honestly, if I ever have to look at that
garbage, I would throw up.
And WARN_ONCE() is in stuff I *do* look at.
So no. I'm NAK'ing it just because it makes the asm look entirely unreadable.
And no, I'm not ok with only using 'objdump' and friends to look at
assembly generation. I want to be able to do
make xyz.s
and look at code generation without throwing up.
The fact that we have this disgusting thing elsewhere in places that
I've not looked at does *not* excuse adding it to other places.
Linus
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 16:38 ` Linus Torvalds
@ 2025-06-02 18:09 ` Peter Zijlstra
2025-06-02 20:04 ` Josh Poimboeuf
2025-06-02 20:16 ` Josh Poimboeuf
2025-06-02 21:57 ` Peter Zijlstra
1 sibling, 2 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 18:09 UTC (permalink / raw)
To: Linus Torvalds; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, Jun 02, 2025 at 09:38:09AM -0700, Linus Torvalds wrote:
> On Mon, 2 Jun 2025 at 08:50, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Yes, it absolutely builds with clang. The inline asm isn't something we
> > don't already do elsewhere :-) *cough* extable *cough*
>
> Eww. I hadn't looked at that (or repressed it if I did). *Shudder*.
>
> But looking around, I don't think any of the normal code I ever look
> at actually *generate* that disgusting thing.
>
> I had to search for it, and looked at the absolute horror it generates
> in the futex code, and honestly, if I ever have to look at that
> garbage, I would throw up.
>
> And WARN_ONCE() is in stuff I *do* look at.
>
> So no. I'm NAK'ing it just because it makes the asm look entirely unreadable.
>
> And no, I'm not ok with only using 'objdump' and friends to look at
> assembly generation. I want to be able to do
>
> make xyz.s
>
> and look at code generation without throwing up.
>
> The fact that we have this disgusting thing elsewhere in places that
> I've not looked at does *not* excuse adding it to other places.
Right. So the problem is using asm macros in inline-asm. We've tried
adding those macros to a global asm, but IIRC that had trouble.
So yeah, now we do the macro definition and purge right around the
inline asm and then you get this horror show :-(
Anyway, I'll try and come up with something else.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 18:09 ` Peter Zijlstra
@ 2025-06-02 20:04 ` Josh Poimboeuf
2025-06-02 20:16 ` Josh Poimboeuf
1 sibling, 0 replies; 21+ messages in thread
From: Josh Poimboeuf @ 2025-06-02 20:04 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Linus Torvalds, x86, linux-kernel, kees, acarmina, mark.rutland
On Mon, Jun 02, 2025 at 08:09:22PM +0200, Peter Zijlstra wrote:
> On Mon, Jun 02, 2025 at 09:38:09AM -0700, Linus Torvalds wrote:
> > On Mon, 2 Jun 2025 at 08:50, Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > Yes, it absolutely builds with clang. The inline asm isn't something we
> > > don't already do elsewhere :-) *cough* extable *cough*
> >
> > Eww. I hadn't looked at that (or repressed it if I did). *Shudder*.
> >
> > But looking around, I don't think any of the normal code I ever look
> > at actually *generate* that disgusting thing.
> >
> > I had to search for it, and looked at the absolute horror it generates
> > in the futex code, and honestly, if I ever have to look at that
> > garbage, I would throw up.
> >
> > And WARN_ONCE() is in stuff I *do* look at.
> >
> > So no. I'm NAK'ing it just because it makes the asm look entirely unreadable.
> >
> > And no, I'm not ok with only using 'objdump' and friends to look at
> > assembly generation. I want to be able to do
> >
> > make xyz.s
> >
> > and look at code generation without throwing up.
> >
> > The fact that we have this disgusting thing elsewhere in places that
> > I've not looked at does *not* excuse adding it to other places.
>
> Right. So the problem is using asm macros in inline-asm. We've tried
> adding those macros to a global asm, but IIRC that had trouble.
>
> So yeah, now we do the macro definition and purge right around the
> inline asm and then you get this horror show :-(
>
> Anyway, I'll try and come up with something else.
At least the purge could be skipped if you check a global assembler
variable before defining the macro.
For example here was one of my experiments trying to making alternatives
more readable:
#define DEFINE_ALT_MACRO \
".ifndef alt; " \
".set alt, 1; " \
__stringify(ALTERNATIVE_MACRO) "; " \
".endif\n\n\t" \
#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, ft_flags_str) \
"\n# <ALTERNATIVE>\n" \
DEFINE_ALT_MACRO \
"alternative \"" oldinstr "\", \"" \
newinstr "\", \"" \
__stringify(ft_flags) "\" /* " ft_flags_str " */" \
"\n# </ALTERNATIVE>\n"
which produced code like:
# <ALTERNATIVE>
.ifndef alt; .set alt, 1; .macro __alt_replace orig, orig_end, newinstr, ft_flags, args:vararg; .skip -(((.Lnew_end_\@ - .Lnew_\@) - (\orig_end - \orig)) > 0) * ((.Lnew_end_\@ - .Lnew_\@) - (\orig_end - \orig)), 0x90; .Lskip_end_\@: .pushsection .altinstr_replacement, "ax"; .Lnew_\@: \newinstr; .Lnew_end_\@: .popsection; .pushsection .altinstructions, "a"; .long \orig - .; .long .Lnew_\@ - .; .4byte \ft_flags; .byte .Lskip_end_\@ - \orig; .byte .Lnew_end_\@ - .Lnew_\@; .popsection; .ifnb \args; __alt_replace \orig, \orig_end, \args; .endif; .endm; .macro alternative oldinstr, args:vararg; .Lorig_\@: \oldinstr; .Lorig_end_\@: __alt_replace .Lorig_\@, .Lorig_end_\@, \args; .endm; .endif
alternative "", "stac", "( 9*32+20)" /* X86_FEATURE_SMAP */
# </ALTERNATIVE>
which is much more readable if you ignore the horrendous first line ;-)
--
Josh
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 18:09 ` Peter Zijlstra
2025-06-02 20:04 ` Josh Poimboeuf
@ 2025-06-02 20:16 ` Josh Poimboeuf
2025-06-02 20:33 ` Andrew Cooper
1 sibling, 1 reply; 21+ messages in thread
From: Josh Poimboeuf @ 2025-06-02 20:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Linus Torvalds, x86, linux-kernel, kees, acarmina, mark.rutland
On Mon, Jun 02, 2025 at 08:09:22PM +0200, Peter Zijlstra wrote:
> Right. So the problem is using asm macros in inline-asm. We've tried
> adding those macros to a global asm, but IIRC that had trouble.
Yeah, IIRC, the problem I ran into was that clang doesn't allow defining
asm macros in global asm().
--
Josh
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 20:16 ` Josh Poimboeuf
@ 2025-06-02 20:33 ` Andrew Cooper
0 siblings, 0 replies; 21+ messages in thread
From: Andrew Cooper @ 2025-06-02 20:33 UTC (permalink / raw)
To: jpoimboe
Cc: acarmina, kees, linux-kernel, mark.rutland, peterz, torvalds, x86
> Yeah, IIRC, the problem I ran into was that clang doesn't allow defining
> asm macros in global asm().
Macros in global asm was fixed in Clang 6.
https://bugs.llvm.org/show_bug.cgi?id=36110
But https://github.com/llvm/llvm-project/issues/60792 is still unfixed
and waiting to trip people up.
~Andrew
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 16:38 ` Linus Torvalds
2025-06-02 18:09 ` Peter Zijlstra
@ 2025-06-02 21:57 ` Peter Zijlstra
2025-06-02 22:01 ` Peter Zijlstra
2025-06-02 23:10 ` Linus Torvalds
1 sibling, 2 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 21:57 UTC (permalink / raw)
To: Linus Torvalds; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, Jun 02, 2025 at 09:38:09AM -0700, Linus Torvalds wrote:
> And no, I'm not ok with only using 'objdump' and friends to look at
> assembly generation. I want to be able to do
>
> make xyz.s
>
> and look at code generation without throwing up.
So if I stuff the asm macro in a global asm() block then GCC ends up
looking like so:
.set warn_imm, 0
warn_add_reg var=warn_imm reg=%rcx # tmp215
.set warn_imm, (warn_imm << 4)
warn_add_reg var=warn_imm reg=%rdx # tmp212
1: ud1 warn_imm(%ecx),%rax # tmp210
.pushsection .discard.annotate_insn,"M",@progbits,8
.long 1b - .
.long 8
.popsection
.pushsection __bug_table, "aw" ; 123: .long 1b - . ; .long .LC76 - . ; .long .LC0 - . ; .word 8710 ; .word 2321 ; .org 123b + 6 + 4 + 6 ; .popsection #,,,
However, clangd is 'helpful' and fully expands the asm macro for the .s
file :-(
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 21:57 ` Peter Zijlstra
@ 2025-06-02 22:01 ` Peter Zijlstra
2025-06-02 23:10 ` Linus Torvalds
1 sibling, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-02 22:01 UTC (permalink / raw)
To: Linus Torvalds; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, Jun 02, 2025 at 11:57:25PM +0200, Peter Zijlstra wrote:
> On Mon, Jun 02, 2025 at 09:38:09AM -0700, Linus Torvalds wrote:
>
> > And no, I'm not ok with only using 'objdump' and friends to look at
> > assembly generation. I want to be able to do
> >
> > make xyz.s
> >
> > and look at code generation without throwing up.
>
> So if I stuff the asm macro in a global asm() block then GCC ends up
> looking like so:
>
> .set warn_imm, 0
> warn_add_reg var=warn_imm reg=%rcx # tmp215
> .set warn_imm, (warn_imm << 4)
> warn_add_reg var=warn_imm reg=%rdx # tmp212
> 1: ud1 warn_imm(%ecx),%rax # tmp210
> .pushsection .discard.annotate_insn,"M",@progbits,8
> .long 1b - .
> .long 8
> .popsection
> .pushsection __bug_table, "aw" ; 123: .long 1b - . ; .long .LC76 - . ; .long .LC0 - . ; .word 8710 ; .word 2321 ; .org 123b + 6 + 4 + 6 ; .popsection #,,,
>
> However, clangd is 'helpful' and fully expands the asm macro for the .s
> file :-(
It generates his horror show:
#APP
.set warn_imm, 0
.set .Lregnr, 0
.set .Lregnr, 1
.set .Lregnr, 2
.set warn_imm, 2
.set .Lregnr, 3
.set .Lregnr, 4
.set .Lregnr, 5
.set .Lregnr, 6
.set .Lregnr, 7
.set .Lregnr, 8
.set .Lregnr, 9
.set .Lregnr, 10
.set .Lregnr, 11
.set .Lregnr, 12
.set .Lregnr, 13
.set .Lregnr, 14
.set .Lregnr, 15
.set .Lregnr, 16
.set .Lregnr, 0
.set .Lregnr, 1
.set .Lregnr, 2
.set .Lregnr, 3
.set .Lregnr, 4
.set .Lregnr, 5
.set .Lregnr, 6
.set .Lregnr, 7
.set .Lregnr, 8
.set .Lregnr, 9
.set .Lregnr, 10
.set .Lregnr, 11
.set .Lregnr, 12
.set .Lregnr, 13
.set .Lregnr, 14
.set .Lregnr, 15
.set .Lregnr, 16
.set warn_imm, 32
.set .Lregnr, 0
.set .Lregnr, 1
.set warn_imm, 33
.set .Lregnr, 2
.set .Lregnr, 3
.set .Lregnr, 4
.set .Lregnr, 5
.set .Lregnr, 6
.set .Lregnr, 7
.set .Lregnr, 8
.set .Lregnr, 9
.set .Lregnr, 10
.set .Lregnr, 11
.set .Lregnr, 12
.set .Lregnr, 13
.set .Lregnr, 14
.set .Lregnr, 15
.set .Lregnr, 16
.set .Lregnr, 0
.set .Lregnr, 1
.set .Lregnr, 2
.set .Lregnr, 3
.set .Lregnr, 4
.set .Lregnr, 5
.set .Lregnr, 6
.set .Lregnr, 7
.set .Lregnr, 8
.set .Lregnr, 9
.set .Lregnr, 10
.set .Lregnr, 11
.set .Lregnr, 12
.set .Lregnr, 13
.set .Lregnr, 14
.set .Lregnr, 15
.set .Lregnr, 16
.Ltmp1656:
ud1q 33(%ecx), %rax
.section .discard.annotate_insn,"M",@progbits,8
.Ltmp1657:
.long .Ltmp1656-.Ltmp1657
.long 8
.section .init.text,"ax",@progbits
.section __bug_table,"aw",@progbits
.Ltmp1658:
.Ltmp1659:
.long .Ltmp1656-.Ltmp1659
.Ltmp1660:
.long .L.str.29-.Ltmp1660
.Ltmp1661:
.long .L.str-.Ltmp1661
.short 8710
.short 2321
.org ((.Ltmp1658+6)+4)+6, 0
.section .init.text,"ax",@progbits
#NO_APP
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 21:57 ` Peter Zijlstra
2025-06-02 22:01 ` Peter Zijlstra
@ 2025-06-02 23:10 ` Linus Torvalds
2025-06-03 13:04 ` Peter Zijlstra
1 sibling, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2025-06-02 23:10 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, 2 Jun 2025 at 14:57, Peter Zijlstra <peterz@infradead.org> wrote:
>
> So if I stuff the asm macro in a global asm() block then GCC ends up
> looking like so:
Better, but as then the clang thing looks like a horrendous disaster.
How about we simply make this all *code* instead of playing games with
register numbers?
Why not just push the arguments by hand on the stack, and make that be
the interface? A 'push %reg' is like a byte or two. And you'd do it in
the cold section, so nobody cares.
And the asm would look somewhat sane, instead of being crazy noise due
to crazy macros.
Or so I imagine, because I didn't actually try it.
Linus
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-02 23:10 ` Linus Torvalds
@ 2025-06-03 13:04 ` Peter Zijlstra
2025-06-03 22:23 ` David Laight
0 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2025-06-03 13:04 UTC (permalink / raw)
To: Linus Torvalds; +Cc: x86, linux-kernel, kees, acarmina, jpoimboe, mark.rutland
On Mon, Jun 02, 2025 at 04:10:16PM -0700, Linus Torvalds wrote:
> On Mon, 2 Jun 2025 at 14:57, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > So if I stuff the asm macro in a global asm() block then GCC ends up
> > looking like so:
>
> Better, but as then the clang thing looks like a horrendous disaster.
>
> How about we simply make this all *code* instead of playing games with
> register numbers?
>
> Why not just push the arguments by hand on the stack, and make that be
> the interface? A 'push %reg' is like a byte or two. And you'd do it in
> the cold section, so nobody cares.
>
> And the asm would look somewhat sane, instead of being crazy noise due
> to crazy macros.
>
> Or so I imagine, because I didn't actually try it.
Yeah, I can make that work.
I've been trying to make __WARN_printk() (or similar) do a tail-call to
a "UD2; RET;" stub. But doing printk() in a function makes GCC generate
wild code that refuses to actually tail-call :/
The next crazy idea was to make a variant of __WARN_printk() that takes
a struct bug_entry * as first argument such that it has access to the
bug entry and then take the trap on the way out (while keeping the
pointer in the first argument) and then have the trap handler complete
things.
That way it would all 'just work'. Except I can't seem to force GCC to
emit that tail-call :-(
I'll prod at it some more.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC 6/8] x86_64/bug: Implement __WARN_printf()
2025-06-03 13:04 ` Peter Zijlstra
@ 2025-06-03 22:23 ` David Laight
0 siblings, 0 replies; 21+ messages in thread
From: David Laight @ 2025-06-03 22:23 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Linus Torvalds, x86, linux-kernel, kees, acarmina, jpoimboe,
mark.rutland
On Tue, 3 Jun 2025 15:04:55 +0200
Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Jun 02, 2025 at 04:10:16PM -0700, Linus Torvalds wrote:
> > On Mon, 2 Jun 2025 at 14:57, Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > So if I stuff the asm macro in a global asm() block then GCC ends up
> > > looking like so:
> >
> > Better, but as then the clang thing looks like a horrendous disaster.
> >
> > How about we simply make this all *code* instead of playing games with
> > register numbers?
> >
> > Why not just push the arguments by hand on the stack, and make that be
> > the interface? A 'push %reg' is like a byte or two. And you'd do it in
> > the cold section, so nobody cares.
> >
> > And the asm would look somewhat sane, instead of being crazy noise due
> > to crazy macros.
> >
> > Or so I imagine, because I didn't actually try it.
>
> Yeah, I can make that work.
>
> I've been trying to make __WARN_printk() (or similar) do a tail-call to
> a "UD2; RET;" stub. But doing printk() in a function makes GCC generate
> wild code that refuses to actually tail-call :/
>
> The next crazy idea was to make a variant of __WARN_printk() that takes
> a struct bug_entry * as first argument such that it has access to the
> bug entry and then take the trap on the way out (while keeping the
> pointer in the first argument) and then have the trap handler complete
> things.
>
> That way it would all 'just work'. Except I can't seem to force GCC to
> emit that tail-call :-(
>
> I'll prod at it some more.
How about a slightly less generic macro, something that could be:
#define WARN_IF(a, op, b, msg) \
if (unlikely((a) op (b)) { \
printf("WARN: %s (%x " #op " %x)\n", \
msg ? msg : "(" #a ") " #op " (" #b ")", (a), (b)); \
}
but could just be:
if (unlikely((a) op (b)) {
asm( " ud2; cmp %0, %1"
" .asciz msg; .asciz #op"
:: "r" (a), "r" (b));
}
So a ud2 followed by a reg-reg compare (should be REX/D16 prefix followed
by '3[89ab] /r') and two strings (literals or addresses).
With a suitable exception table entry.
That saves the problem of a generic printf format while still giving the
values of the variables associated with the failing test (for simple tests).
It should also avoid destroying register assignment for the rest of the
function.
If gcc refuses to do a jump for the 'if (unlikely...))' try adding an 'else'
clause containing an asm comment.
I've done that in the past to convert a backwards conditional jump (predicted taken)
into a forwards jump (predicted not taken) to an unconditional jump to the
actual target.
(I had a very tight clock limit for the 'worst case' path.)
David
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2025-06-03 22:23 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-02 14:42 [RFC 0/8] x86: Mad WARN() hackery Peter Zijlstra
2025-06-02 14:42 ` [RFC 1/8] x86: Provide assembly __bug_table helpers Peter Zijlstra
2025-06-02 14:42 ` [RFC 2/8] bug: Add BUGFLAG_FORMAT infrastructure Peter Zijlstra
2025-06-02 14:42 ` [RFC 3/8] bug: Clean up CONFIG_GENERIC_BUG_RELATIVE_POINTERS Peter Zijlstra
2025-06-02 14:42 ` [RFC 4/8] bug: Allow architectures to provide __WARN_printf() Peter Zijlstra
2025-06-02 14:42 ` [RFC 5/8] x86_64/bug: Add BUG_FORMAT basics Peter Zijlstra
2025-06-02 14:42 ` [RFC 6/8] x86_64/bug: Implement __WARN_printf() Peter Zijlstra
2025-06-02 15:02 ` Linus Torvalds
2025-06-02 15:49 ` Peter Zijlstra
2025-06-02 16:38 ` Linus Torvalds
2025-06-02 18:09 ` Peter Zijlstra
2025-06-02 20:04 ` Josh Poimboeuf
2025-06-02 20:16 ` Josh Poimboeuf
2025-06-02 20:33 ` Andrew Cooper
2025-06-02 21:57 ` Peter Zijlstra
2025-06-02 22:01 ` Peter Zijlstra
2025-06-02 23:10 ` Linus Torvalds
2025-06-03 13:04 ` Peter Zijlstra
2025-06-03 22:23 ` David Laight
2025-06-02 14:42 ` [RFC 7/8] x86/bug: Implement WARN_ONCE() Peter Zijlstra
2025-06-02 14:42 ` [RFC 8/8] x86: Clean up default rethunk warning Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox