* [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
@ 2026-05-13 16:12 Rik van Riel
2026-05-13 16:31 ` Dave Hansen
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Rik van Riel @ 2026-05-13 16:12 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Ingo Molnar, Borislav Petkov, x86, linux-kernel, kernel-team,
Dave Hansen, Andy Lutomirski, Xin Li
exc_page_fault() reads CR2 in the C handler (fault.c), not in the asm
entry stub. An NMI can fire in the window between the CPU setting CR2
and the kernel calling read_cr2(). If the NMI handler runs
KASAN-instrumented code that touches a KASAN shadow for a CEA gap
address (unmapped, PMD=0), the nested fault overwrites CR2. When the
NMI returns, the original handler reads the wrong value.
This manifests as a consistent pattern: CR2 in fffffe0000xxxxxx (CEA
range) with page offset 0x028, regardless of the actual faulting
address. The real fault address is lost.
Fix: move read_cr2() into the asm entry stub via new idtentry_pf /
DEFINE_IDTENTRY_PF macros, reading %cr2 into %rdx immediately after
error_entry (SWAPGS done, registers saved), before any C or KASAN code
runs. Pass the address as a 3rd argument to exc_page_fault().
For FRED, hardware saves the faulting address in event data, so we use
fred_event_data(regs) instead, which has no race.
This reduces the NMI/RC2 race window by 90% -- from hundreds of C
instructions deep in the handler down to ~30 asm instructions in
error_entry.
Found with syzkaller
Assisted-by: Claude:claude-opus-4.7 syzkaller
Signed-off-by: Rik van Riel <riel@surriel.com>
---
arch/x86/coco/sev/vc-handle.c | 3 +-
arch/x86/entry/entry_64.S | 61 +++++++++++++++++++++++++++++++++
arch/x86/entry/entry_fred.c | 2 +-
arch/x86/include/asm/idtentry.h | 34 +++++++++++++++++-
arch/x86/mm/fault.c | 11 ++++--
5 files changed, 104 insertions(+), 7 deletions(-)
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
index d98b5c08ef00..55362d017cc9 100644
--- a/arch/x86/coco/sev/vc-handle.c
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -113,8 +113,7 @@ void vc_forward_exception(struct es_em_ctxt *ctxt)
exc_invalid_op(ctxt->regs);
break;
case X86_TRAP_PF:
- write_cr2(ctxt->fi.cr2);
- exc_page_fault(ctxt->regs, error_code);
+ exc_page_fault(ctxt->regs, error_code, ctxt->fi.cr2);
break;
case X86_TRAP_AC:
exc_alignment_check(ctxt->regs, error_code);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 42447b1e1dff..a02313909516 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,6 +364,67 @@ _ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
.endm
+/**
+ * idtentry_body_pf - Macro to emit code calling the #PF C function
+ * @cfunc: C function to be called
+ *
+ * Reads CR2 immediately after error_entry completes (PUSH_AND_CLEAR_REGS
+ * and SWAPGS are done) and passes it as the 3rd argument to the C handler.
+ * This minimizes the window where an NMI can clobber CR2 by triggering a
+ * nested fault (e.g., KASAN shadow access on unmapped CEA gap addresses).
+ */
+.macro idtentry_body_pf cfunc
+
+ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+ "call xen_error_entry", X86_FEATURE_XENPV
+
+ ENCODE_FRAME_POINTER
+ UNWIND_HINT_REGS
+
+ /*
+ * Read CR2 immediately. error_entry has completed SWAPGS (if
+ * needed) so per-cpu state is accessible, but more importantly
+ * we read CR2 before calling any C code that might be
+ * KASAN-instrumented or otherwise trigger nested faults.
+ */
+ movq %cr2, %rdx
+
+ movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+
+ /* %rdx already has CR2 value as 3rd argument */
+
+ /* For some configurations \cfunc ends up being a noreturn. */
+ ANNOTATE_REACHABLE
+ call \cfunc
+
+ jmp error_return
+.endm
+
+/**
+ * idtentry_pf - Entry point for #PF (page fault)
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * Page fault specific entry macro that reads CR2 as early as possible
+ * to avoid NMI-induced CR2 clobbering. Hardware pushes the error code.
+ */
+.macro idtentry_pf vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+
+ UNWIND_HINT_IRET_ENTRY offset=8
+ ENDBR
+ ASM_CLAC
+ cld
+
+ idtentry_body_pf \cfunc
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+
/*
* Interrupt entry/exit.
*
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index fbe2d10dd737..3a1fa438535b 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -191,7 +191,7 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
{
/* Optimize for #PF. That's the only exception which matters performance wise */
if (likely(regs->fred_ss.vector == X86_TRAP_PF))
- return exc_page_fault(regs, error_code);
+ return exc_page_fault(regs, error_code, fred_event_data(regs));
switch (regs->fred_ss.vector) {
case X86_TRAP_DE: return exc_divide_error(regs);
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 42bf6a58ec36..6c5cbb858817 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -180,6 +180,35 @@ noinstr void fred_##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \
__visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
+/**
+ * DECLARE_IDTENTRY_PF - Declare functions for page fault IDT entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Same as DECLARE_IDTENTRY_RAW_ERRORCODE() but with an extra address
+ * argument. The address (CR2 or FRED event data) is read as early as
+ * possible in the asm entry stub to minimize the window where an NMI
+ * can clobber CR2 by triggering a nested fault.
+ */
+#define DECLARE_IDTENTRY_PF(vector, func) \
+ asmlinkage void asm_##func(void); \
+ asmlinkage void xen_asm_##func(void); \
+ __visible void func(struct pt_regs *regs, \
+ unsigned long error_code, \
+ unsigned long address)
+
+/**
+ * DEFINE_IDTENTRY_PF - Emit code for page fault IDT entry point
+ * @func: Function name of the entry point
+ *
+ * Same as DEFINE_IDTENTRY_RAW_ERRORCODE() but with an extra address
+ * argument containing the faulting address (from CR2 or FRED event data).
+ */
+#define DEFINE_IDTENTRY_PF(func) \
+__visible noinstr void func(struct pt_regs *regs, \
+ unsigned long error_code, \
+ unsigned long address)
+
/**
* DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry
* points (common/spurious)
@@ -489,6 +518,9 @@ void fred_install_sysvec(unsigned int vector, const idtentry_t function);
#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \
DECLARE_IDTENTRY_ERRORCODE(vector, func)
+#define DECLARE_IDTENTRY_PF(vector, func) \
+ idtentry_pf vector asm_##func func
+
/* Entries for common/spurious (device) interrupts */
#define DECLARE_IDTENTRY_IRQ(vector, func) \
idtentry_irq vector func
@@ -615,7 +647,7 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check);
/* Raw exception entries which need extra work */
DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
-DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
+DECLARE_IDTENTRY_PF(X86_TRAP_PF, exc_page_fault);
#if defined(CONFIG_IA32_EMULATION)
DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b83a06739b51..f0e2341e551f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1480,12 +1480,17 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
local_irq_disable();
}
-DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
+DEFINE_IDTENTRY_PF(exc_page_fault)
{
irqentry_state_t state;
- unsigned long address;
- address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
+ /*
+ * For FRED, the faulting address is saved atomically by hardware
+ * as event data. Override the asm-provided CR2 value which is
+ * meaningless under FRED.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ address = fred_event_data(regs);
/*
* KVM uses #PF vector to deliver 'page not present' events to guests
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 16:12 [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window Rik van Riel
@ 2026-05-13 16:31 ` Dave Hansen
2026-05-13 16:43 ` Rik van Riel
2026-05-14 18:09 ` kernel test robot
2026-05-14 22:54 ` kernel test robot
2 siblings, 1 reply; 8+ messages in thread
From: Dave Hansen @ 2026-05-13 16:31 UTC (permalink / raw)
To: Rik van Riel, Thomas Gleixner
Cc: Ingo Molnar, Borislav Petkov, x86, linux-kernel, kernel-team,
Dave Hansen, Andy Lutomirski, Xin Li
On 5/13/26 09:12, Rik van Riel wrote:
> This reduces the NMI/RC2 race window by 90% -- from hundreds of C
> instructions deep in the handler down to ~30 asm instructions in
> error_entry.
I'm all for using the FRED data. That's a no-brainer.
But I'm not sure the assembly and plumbing is worth it just to _reduce_
but not eliminate a race for non-FRED systems.
What's the actual end-user-visible fallout from the race? Don't we just
see the KASAN fault, skip it, IRET from the #PF and repeat the #PF?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 16:31 ` Dave Hansen
@ 2026-05-13 16:43 ` Rik van Riel
2026-05-13 17:12 ` Dave Hansen
0 siblings, 1 reply; 8+ messages in thread
From: Rik van Riel @ 2026-05-13 16:43 UTC (permalink / raw)
To: Dave Hansen, Thomas Gleixner
Cc: Ingo Molnar, Borislav Petkov, x86, linux-kernel, kernel-team,
Dave Hansen, Andy Lutomirski, Xin Li
On Wed, 2026-05-13 at 09:31 -0700, Dave Hansen wrote:
> On 5/13/26 09:12, Rik van Riel wrote:
> > This reduces the NMI/RC2 race window by 90% -- from hundreds of C
> > instructions deep in the handler down to ~30 asm instructions in
> > error_entry.
>
> I'm all for using the FRED data. That's a no-brainer.
>
> But I'm not sure the assembly and plumbing is worth it just to
> _reduce_
> but not eliminate a race for non-FRED systems.
>
> What's the actual end-user-visible fallout from the race? Don't we
> just
> see the KASAN fault, skip it, IRET from the #PF and repeat the #PF?
That's a good question.
I have only "seen" this in syzkaller, so I don't know
what the symptoms would look like if a regular system
hit this race window.
I would not be surprised if we occasionally hit this
in production, but given that I don't know what the
symptoms would look like, I have no idea how often.
--
All Rights Reversed.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 16:43 ` Rik van Riel
@ 2026-05-13 17:12 ` Dave Hansen
2026-05-14 19:49 ` Andrew Cooper
0 siblings, 1 reply; 8+ messages in thread
From: Dave Hansen @ 2026-05-13 17:12 UTC (permalink / raw)
To: Rik van Riel, Thomas Gleixner
Cc: Ingo Molnar, Borislav Petkov, x86, linux-kernel, kernel-team,
Dave Hansen, Andy Lutomirski, Xin Li
On 5/13/26 09:43, Rik van Riel wrote:
...
> I have only "seen" this in syzkaller, so I don't know
> what the symptoms would look like if a regular system
> hit this race window.
>
> I would not be surprised if we occasionally hit this
> in production, but given that I don't know what the
> symptoms would look like, I have no idea how often.
I don't think there's a concrete enough problem being solved here to
justify adding complexity to the entry code. Especially since it's not
even a robust solution for the problem.
The FRED thing is simple, robust, and doesn't affect the actual (nasty)
entry assembly.
I think we should do the FRED bits and continue to ignore the pre-FRED
problem.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 16:12 [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window Rik van Riel
2026-05-13 16:31 ` Dave Hansen
@ 2026-05-14 18:09 ` kernel test robot
2026-05-14 22:54 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2026-05-14 18:09 UTC (permalink / raw)
To: Rik van Riel, Thomas Gleixner
Cc: oe-kbuild-all, Ingo Molnar, Borislav Petkov, x86, linux-kernel,
kernel-team, Dave Hansen, Andy Lutomirski, Xin Li
Hi Rik,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/master]
[also build test ERROR on tip/x86/asm tip/x86/core linus/master v7.1-rc3 next-20260508]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Rik-van-Riel/x86-entry-Read-CR2-in-asm-entry-stub-to-redcue-NMI-clobbering-window/20260514-162554
base: tip/master
patch link: https://lore.kernel.org/r/20260513121259.0aae7adc%40fangorn
patch subject: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
config: i386-allnoconfig (https://download.01.org/0day-ci/archive/20260515/202605150207.pvBEOHK3-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260515/202605150207.pvBEOHK3-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202605150207.pvBEOHK3-lkp@intel.com/
All errors (new ones prefixed by >>):
arch/x86/include/asm/idtentry.h: Assembler messages:
>> arch/x86/include/asm/idtentry.h:627: Error: no such instruction: `idtentry_pf 14 asm_exc_page_fault exc_page_fault'
vim +627 arch/x86/include/asm/idtentry.h
623
624 /* Raw exception entries which need extra work */
625 DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
626 DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
> 627 DECLARE_IDTENTRY_PF(X86_TRAP_PF, exc_page_fault);
628
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 17:12 ` Dave Hansen
@ 2026-05-14 19:49 ` Andrew Cooper
2026-05-14 19:55 ` Dave Hansen
0 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2026-05-14 19:49 UTC (permalink / raw)
To: dave.hansen
Cc: Andrew Cooper, bp, dave.hansen, kernel-team, linux-kernel, luto,
mingo, riel, tglx, x86, xin
> I don't think there's a concrete enough problem being solved here to
> justify adding complexity to the entry code. Especially since it's not
> even a robust solution for the problem.
Well, corrupting CR2 is a real problem.
The very best case is that fixing the pagefault at the wrong CR2 is
benign, then rerun the faulting instruction having not fixed it up, and
get a repeat fault which this time wins the race with the NMI and
doesn't corrupt CR2, and gets fixed up properly.
It's not just CR2 which is a problem; DR6 and XFD_ERR. (the other
registers which have automatic side effects from exception delivery).
Really, the NMI handler needs to preserve the registers it potentially
clobbers, and that still requires the NMI handler doing it in assembly
before entering C.
~Andrew
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-14 19:49 ` Andrew Cooper
@ 2026-05-14 19:55 ` Dave Hansen
0 siblings, 0 replies; 8+ messages in thread
From: Dave Hansen @ 2026-05-14 19:55 UTC (permalink / raw)
To: Andrew Cooper
Cc: bp, dave.hansen, kernel-team, linux-kernel, luto, mingo, riel,
tglx, x86, xin
On 5/14/26 12:49, Andrew Cooper wrote:
> Really, the NMI handler needs to preserve the registers it potentially
> clobbers, and that still requires the NMI handler doing it in assembly
> before entering C.
Yup, totally agree.
It's not rocket science, and it's what we do for CR3 because of PTI, but
it's also no fun. So I didn't want to suggest it.
I mean, we've lived with this behavior _forever_, right? So how bad
could it be?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
2026-05-13 16:12 [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window Rik van Riel
2026-05-13 16:31 ` Dave Hansen
2026-05-14 18:09 ` kernel test robot
@ 2026-05-14 22:54 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2026-05-14 22:54 UTC (permalink / raw)
To: Rik van Riel, Thomas Gleixner
Cc: llvm, oe-kbuild-all, Ingo Molnar, Borislav Petkov, x86,
linux-kernel, kernel-team, Dave Hansen, Andy Lutomirski, Xin Li
Hi Rik,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/master]
[also build test ERROR on tip/x86/asm tip/x86/core linus/master v7.1-rc3 next-20260508]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Rik-van-Riel/x86-entry-Read-CR2-in-asm-entry-stub-to-redcue-NMI-clobbering-window/20260514-162554
base: tip/master
patch link: https://lore.kernel.org/r/20260513121259.0aae7adc%40fangorn
patch subject: [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window
config: i386-buildonly-randconfig-006-20260515 (https://download.01.org/0day-ci/archive/20260515/202605150623.VcCzav1N-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260515/202605150623.VcCzav1N-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202605150623.VcCzav1N-lkp@intel.com/
All errors (new ones prefixed by >>):
>> arch/x86/include/asm/idtentry.h:627:16: error: unexpected token in argument list
idtentry_pf 14 asm_exc_page_fault exc_page_fault;
^
vim +627 arch/x86/include/asm/idtentry.h
623
624 /* Raw exception entries which need extra work */
625 DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
626 DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
> 627 DECLARE_IDTENTRY_PF(X86_TRAP_PF, exc_page_fault);
628
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2026-05-14 22:58 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 16:12 [PATCH] x86/entry: Read CR2 in asm entry stub to redcue NMI clobbering window Rik van Riel
2026-05-13 16:31 ` Dave Hansen
2026-05-13 16:43 ` Rik van Riel
2026-05-13 17:12 ` Dave Hansen
2026-05-14 19:49 ` Andrew Cooper
2026-05-14 19:55 ` Dave Hansen
2026-05-14 18:09 ` kernel test robot
2026-05-14 22:54 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.