* [PATCH v2 1/5] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h>
2026-03-23 20:59 [PATCH v2 0/5] Fuller TDX kexec support Vishal Verma
@ 2026-03-23 20:59 ` Vishal Verma
2026-03-24 9:49 ` Chao Gao
2026-03-23 20:59 ` [PATCH v2 2/5] x86/virt/tdx: Pull kexec cache flush logic into arch/x86 Vishal Verma
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Vishal Verma @ 2026-03-23 20:59 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini
Cc: linux-kernel, linux-coco, kvm, Vishal Verma, Kiryl Shutsemau,
Kiryl Shutsemau
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Today there are two separate locations where TDX error codes are defined:
arch/x86/include/asm/tdx.h
arch/x86/kvm/vmx/tdx_errno.h
They have some overlap that is already defined similarly. Reduce the
duplication and prepare to introduce some helpers for these error codes in
the central place by unifying them. Join them at:
asm/shared/tdx_errno.h
...and update the headers that contained the duplicated definitions to
include the new unified header.
"asm/shared" is used for sharing TDX code between the early compressed
code and the normal kernel code. While the compressed code for the guest
doesn't use these error code header definitions today, it does make the
types of calls that return the values they define. So place the defines in
"shared" location so that it can, but leave such cleanups for future
changes.
Also, adjust BITUL() -> _BITULL() to address 32 bit build errors after the
move.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
[enhance log]
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
arch/x86/include/asm/shared/tdx.h | 1 +
.../{kvm/vmx => include/asm/shared}/tdx_errno.h | 28 +++++++++++++++++-----
arch/x86/include/asm/tdx.h | 21 ----------------
arch/x86/kvm/vmx/tdx.h | 1 -
4 files changed, 23 insertions(+), 28 deletions(-)
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 8bc074c8d7c6..6a1646fc2b2f 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -4,6 +4,7 @@
#include <linux/bits.h>
#include <linux/types.h>
+#include <asm/shared/tdx_errno.h>
#define TDX_HYPERCALL_STANDARD 0
diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h
similarity index 64%
rename from arch/x86/kvm/vmx/tdx_errno.h
rename to arch/x86/include/asm/shared/tdx_errno.h
index 6ff4672c4181..8bf6765cf082 100644
--- a/arch/x86/kvm/vmx/tdx_errno.h
+++ b/arch/x86/include/asm/shared/tdx_errno.h
@@ -1,14 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* architectural status code for SEAMCALL */
-
-#ifndef __KVM_X86_TDX_ERRNO_H
-#define __KVM_X86_TDX_ERRNO_H
+#ifndef _ASM_X86_SHARED_TDX_ERRNO_H
+#define _ASM_X86_SHARED_TDX_ERRNO_H
+#include <asm/trapnr.h>
+/* Upper 32 bit of the TDX error code encodes the status */
#define TDX_SEAMCALL_STATUS_MASK 0xFFFFFFFF00000000ULL
/*
- * TDX SEAMCALL Status Codes (returned in RAX)
+ * TDX Status Codes (returned in RAX)
*/
+#define TDX_SUCCESS 0ULL
#define TDX_NON_RECOVERABLE_VCPU 0x4000000100000000ULL
#define TDX_NON_RECOVERABLE_TD 0x4000000200000000ULL
#define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL
@@ -17,6 +18,7 @@
#define TDX_OPERAND_INVALID 0xC000010000000000ULL
#define TDX_OPERAND_BUSY 0x8000020000000000ULL
#define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL
+#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
#define TDX_PAGE_METADATA_INCORRECT 0xC000030000000000ULL
#define TDX_VCPU_NOT_ASSOCIATED 0x8000070200000000ULL
#define TDX_KEY_GENERATION_FAILED 0x8000080000000000ULL
@@ -28,6 +30,20 @@
#define TDX_EPT_ENTRY_STATE_INCORRECT 0xC0000B0D00000000ULL
#define TDX_METADATA_FIELD_NOT_READABLE 0xC0000C0200000000ULL
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR _BITULL(63)
+#define TDX_NON_RECOVERABLE _BITULL(62)
+#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _ULL(0xFFFF0000))
+
+#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP)
+#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD)
+
/*
* TDX module operand ID, appears in 31:0 part of error code as
* detail information
@@ -37,4 +53,4 @@
#define TDX_OPERAND_ID_SEPT 0x92
#define TDX_OPERAND_ID_TD_EPOCH 0xa9
-#endif /* __KVM_X86_TDX_ERRNO_H */
+#endif /* _ASM_X86_SHARED_TDX_ERRNO_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index a149740b24e8..2917b3451491 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -9,29 +9,8 @@
#include <asm/errno.h>
#include <asm/ptrace.h>
-#include <asm/trapnr.h>
#include <asm/shared/tdx.h>
-/*
- * SW-defined error codes.
- *
- * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
- * TDX module.
- */
-#define TDX_ERROR _BITUL(63)
-#define TDX_NON_RECOVERABLE _BITUL(62)
-#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
-#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000))
-
-#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP)
-#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD)
-
-/*
- * TDX module SEAMCALL leaf function error codes
- */
-#define TDX_SUCCESS 0ULL
-#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
-
#ifndef __ASSEMBLER__
#include <uapi/asm/mce.h>
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index b5cd2ffb303e..ac8323a68b16 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -3,7 +3,6 @@
#define __KVM_X86_VMX_TDX_H
#include "tdx_arch.h"
-#include "tdx_errno.h"
#ifdef CONFIG_KVM_INTEL_TDX
#include "common.h"
--
2.53.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v2 1/5] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h>
2026-03-23 20:59 ` [PATCH v2 1/5] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h> Vishal Verma
@ 2026-03-24 9:49 ` Chao Gao
0 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2026-03-24 9:49 UTC (permalink / raw)
To: Vishal Verma
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini, linux-kernel, linux-coco, kvm
On Mon, Mar 23, 2026 at 02:59:04PM -0600, Vishal Verma wrote:
>From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
>
>Today there are two separate locations where TDX error codes are defined:
> arch/x86/include/asm/tdx.h
> arch/x86/kvm/vmx/tdx_errno.h
>
>They have some overlap that is already defined similarly. Reduce the
>duplication and prepare to introduce some helpers for these error codes in
>the central place by unifying them. Join them at:
> asm/shared/tdx_errno.h
>...and update the headers that contained the duplicated definitions to
>include the new unified header.
>
>"asm/shared" is used for sharing TDX code between the early compressed
>code and the normal kernel code. While the compressed code for the guest
>doesn't use these error code header definitions today, it does make the
>types of calls that return the values they define. So place the defines in
>"shared" location so that it can, but leave such cleanups for future
>changes.
>
>Also, adjust BITUL() -> _BITULL() to address 32 bit build errors after the
>move.
>
>Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>[enhance log]
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 2/5] x86/virt/tdx: Pull kexec cache flush logic into arch/x86
2026-03-23 20:59 [PATCH v2 0/5] Fuller TDX kexec support Vishal Verma
2026-03-23 20:59 ` [PATCH v2 1/5] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h> Vishal Verma
@ 2026-03-23 20:59 ` Vishal Verma
2026-03-24 10:03 ` Chao Gao
2026-03-23 20:59 ` [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE Vishal Verma
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Vishal Verma @ 2026-03-23 20:59 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini
Cc: linux-kernel, linux-coco, kvm, Kai Huang, Vishal Verma
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
KVM tries to take care of some required cache flushing earlier in the
kexec path in order to be kind to some long standing races that can occur
later in the operation. Until recently, VMXOFF was handled within KVM.
Since VMX being enabled is required to make a SEAMCALL, it had the best
per-cpu scoped operation to plug the flushing into. So it is kicked off
from there.
This early kexec cache flushing in KVM happens via a syscore shutdown
callback. Now that VMX enablement control has moved to arch/x86, which has
grown its own syscore shutdown callback, it no longer make sense for it to
live in KVM. It fits better with the TDX enablement managing code.
In addition, future changes will add a SEAMCALL that happens immediately
before VMXOFF, which means the cache flush in KVM will be too late to
flush the cache before the last SEAMCALL. So move it to the newly added TDX
arch/x86 syscore shutdown handler.
Since tdx_cpu_flush_cache_for_kexec() is no longer needed by KVM, make it
static and remove the export. Since it is also not part of an operation
spread across disparate components, remove the redundant comments and
verbose naming.
In the existing KVM based code, CPU offline also funnels through
tdx_cpu_flush_cache_for_kexec(). So the centralization to the arch/x86
syscore shutdown callback elides this CPU offline time behavior. However,
WBINVD is already generally done at CPU offline as matter of course. So
don't bother adding TDX specific logic for this, and rely on the normal
WBINVD to handle it.
Acked-by: Kai Huang <kai.huang@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
arch/x86/include/asm/tdx.h | 6 ------
arch/x86/kvm/vmx/tdx.c | 10 ----------
arch/x86/virt/vmx/tdx/tdx.c | 39 ++++++++++++++++++++-------------------
3 files changed, 20 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 2917b3451491..7674fc530090 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -205,11 +205,5 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
#endif /* CONFIG_INTEL_TDX_HOST */
-#ifdef CONFIG_KEXEC_CORE
-void tdx_cpu_flush_cache_for_kexec(void);
-#else
-static inline void tdx_cpu_flush_cache_for_kexec(void) { }
-#endif
-
#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index b7264b533feb..50a5cfdbd33e 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -440,16 +440,6 @@ void tdx_disable_virtualization_cpu(void)
tdx_flush_vp(&arg);
}
local_irq_restore(flags);
-
- /*
- * Flush cache now if kexec is possible: this is necessary to avoid
- * having dirty private memory cachelines when the new kernel boots,
- * but WBINVD is a relatively expensive operation and doing it during
- * kexec can exacerbate races in native_stop_other_cpus(). Do it
- * now, since this is a safe moment and there is going to be no more
- * TDX activity on this CPU from this point on.
- */
- tdx_cpu_flush_cache_for_kexec();
}
#define TDX_SEAMCALL_RETRIES 10000
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index cb9b3210ab71..0802d0fd18a4 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -224,8 +224,28 @@ static int tdx_offline_cpu(unsigned int cpu)
return 0;
}
+static void tdx_cpu_flush_cache(void)
+{
+ lockdep_assert_preemption_disabled();
+
+ if (!this_cpu_read(cache_state_incoherent))
+ return;
+
+ wbinvd();
+ this_cpu_write(cache_state_incoherent, false);
+}
+
static void tdx_shutdown_cpu(void *ign)
{
+ /*
+ * Flush cache now if kexec is possible: this is necessary to avoid
+ * having dirty private memory cachelines when the new kernel boots,
+ * but WBINVD is a relatively expensive operation and doing it during
+ * kexec can exacerbate races in native_stop_other_cpus(). Do it
+ * now, since this is a safe moment and there is going to be no more
+ * TDX activity on this CPU from this point on.
+ */
+ tdx_cpu_flush_cache();
x86_virt_put_ref(X86_FEATURE_VMX);
}
@@ -1920,22 +1940,3 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
-
-#ifdef CONFIG_KEXEC_CORE
-void tdx_cpu_flush_cache_for_kexec(void)
-{
- lockdep_assert_preemption_disabled();
-
- if (!this_cpu_read(cache_state_incoherent))
- return;
-
- /*
- * Private memory cachelines need to be clean at the time of
- * kexec. Write them back now, as the caller promises that
- * there should be no more SEAMCALLs on this CPU.
- */
- wbinvd();
- this_cpu_write(cache_state_incoherent, false);
-}
-EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec);
-#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v2 2/5] x86/virt/tdx: Pull kexec cache flush logic into arch/x86
2026-03-23 20:59 ` [PATCH v2 2/5] x86/virt/tdx: Pull kexec cache flush logic into arch/x86 Vishal Verma
@ 2026-03-24 10:03 ` Chao Gao
0 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2026-03-24 10:03 UTC (permalink / raw)
To: Vishal Verma
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini, linux-kernel, linux-coco, kvm,
Kai Huang
On Mon, Mar 23, 2026 at 02:59:05PM -0600, Vishal Verma wrote:
>From: Rick Edgecombe <rick.p.edgecombe@intel.com>
>
>KVM tries to take care of some required cache flushing earlier in the
>kexec path in order to be kind to some long standing races that can occur
>later in the operation. Until recently, VMXOFF was handled within KVM.
>Since VMX being enabled is required to make a SEAMCALL, it had the best
>per-cpu scoped operation to plug the flushing into. So it is kicked off
>from there.
>
>This early kexec cache flushing in KVM happens via a syscore shutdown
>callback. Now that VMX enablement control has moved to arch/x86, which has
>grown its own syscore shutdown callback, it no longer make sense for it to
>live in KVM. It fits better with the TDX enablement managing code.
>
>In addition, future changes will add a SEAMCALL that happens immediately
>before VMXOFF, which means the cache flush in KVM will be too late to
>flush the cache before the last SEAMCALL. So move it to the newly added TDX
>arch/x86 syscore shutdown handler.
>
>Since tdx_cpu_flush_cache_for_kexec() is no longer needed by KVM, make it
>static and remove the export. Since it is also not part of an operation
>spread across disparate components, remove the redundant comments and
>verbose naming.
>
>In the existing KVM based code, CPU offline also funnels through
>tdx_cpu_flush_cache_for_kexec(). So the centralization to the arch/x86
>syscore shutdown callback elides this CPU offline time behavior. However,
>WBINVD is already generally done at CPU offline as matter of course. So
>don't bother adding TDX specific logic for this, and rely on the normal
>WBINVD to handle it.
>
>Acked-by: Kai Huang <kai.huang@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE
2026-03-23 20:59 [PATCH v2 0/5] Fuller TDX kexec support Vishal Verma
2026-03-23 20:59 ` [PATCH v2 1/5] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h> Vishal Verma
2026-03-23 20:59 ` [PATCH v2 2/5] x86/virt/tdx: Pull kexec cache flush logic into arch/x86 Vishal Verma
@ 2026-03-23 20:59 ` Vishal Verma
2026-03-23 21:54 ` Verma, Vishal L
` (2 more replies)
2026-03-23 20:59 ` [PATCH v2 4/5] x86/tdx: Disable the TDX module during kexec and kdump Vishal Verma
2026-03-23 20:59 ` [PATCH v2 5/5] x86/virt/tdx: Remove kexec docs Vishal Verma
4 siblings, 3 replies; 13+ messages in thread
From: Vishal Verma @ 2026-03-23 20:59 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini
Cc: linux-kernel, linux-coco, kvm, Vishal Verma
Some early TDX-capable platforms have an erratum where a partial write
to TDX private memory can cause a machine check on a subsequent read.
On these platforms, kexec and kdump have been disabled in these cases,
because the old kernel cannot safely hand off TDX state to the new
kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
provides a way to cleanly disable TDX and allow kexec to proceed.
The new SEAMCALL has an enumeration bit, but that is ignored. It is
expected that users will be using the latest TDX module, and the failure
mode for running the missing SEAMCALL on an older module is not fatal.
This can be a long running operation, and the time needed largely
depends on the amount of memory that has been allocated to TDs. If all
TDs have been destroyed prior to the sys_disable call, then it is fast,
with only needing to override the TDX module memory.
After the SEAMCALL completes, the TDX module is disabled and all memory
resources allocated to TDX are freed and reset. The next kernel can then
re-initialize the TDX module from scratch via the normal TDX bring-up
sequence.
The SEAMCALL can return two different error codes that expect a retry.
- TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
interrupt. However, it will not return until it makes some forward
progress, so we can expect to complete even in the case of interrupt
storms.
- TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
block other SEAMCALLs once it gets going. So this contention will be
short lived.
So loop infinitely on either of these error codes, until success or other
error.
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
arch/x86/include/asm/shared/tdx_errno.h | 1 +
arch/x86/include/asm/tdx.h | 3 +++
arch/x86/virt/vmx/tdx/tdx.h | 1 +
arch/x86/virt/vmx/tdx/tdx.c | 28 ++++++++++++++++++++++++++++
4 files changed, 33 insertions(+)
diff --git a/arch/x86/include/asm/shared/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h
index 8bf6765cf082..246b4fd54a48 100644
--- a/arch/x86/include/asm/shared/tdx_errno.h
+++ b/arch/x86/include/asm/shared/tdx_errno.h
@@ -15,6 +15,7 @@
#define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL
#define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL
#define TDX_INTERRUPTED_RESUMABLE 0x8000000300000000ULL
+#define TDX_SYS_BUSY 0x8000020200000000ULL
#define TDX_OPERAND_INVALID 0xC000010000000000ULL
#define TDX_OPERAND_BUSY 0x8000020000000000ULL
#define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 7674fc530090..a0a4a15142fc 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -172,6 +172,8 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
return level - 1;
}
+void tdx_sys_disable(void);
+
u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
@@ -203,6 +205,7 @@ static inline void tdx_init(void) { }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
+static inline void tdx_sys_disable(void) { }
#endif /* CONFIG_INTEL_TDX_HOST */
#endif /* !__ASSEMBLER__ */
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index dde219c823b4..e2cf2dd48755 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -46,6 +46,7 @@
#define TDH_PHYMEM_PAGE_WBINVD 41
#define TDH_VP_WR 43
#define TDH_SYS_CONFIG 45
+#define TDH_SYS_DISABLE 69
/*
* SEAMCALL leaf:
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 0802d0fd18a4..3a76000dec7a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -37,6 +37,7 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include <asm/tdx.h>
+#include <asm/shared/tdx_errno.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -1940,3 +1941,30 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
+
+void tdx_sys_disable(void)
+{
+ struct tdx_module_args args = {};
+ u64 ret;
+
+ /*
+ * Don't loop forever.
+ * - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
+ * calls.
+ * - TDX_SYS_BUSY could transiently contend with TDH.SYS.* SEAMCALLs,
+ * but will lock out future ones.
+ *
+ * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
+ * run after this until a full reinitialization is done.
+ */
+ do {
+ ret = seamcall(TDH_SYS_DISABLE, &args);
+ } while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
+
+ /*
+ * Print SEAMCALL failures, but not SW-defined error codes
+ * (SEAMCALL faulted with #GP/#UD, TDX not supported).
+ */
+ if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
+ pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
+}
--
2.53.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE
2026-03-23 20:59 ` [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE Vishal Verma
@ 2026-03-23 21:54 ` Verma, Vishal L
2026-03-23 22:40 ` Huang, Kai
2026-03-24 10:18 ` Chao Gao
2 siblings, 0 replies; 13+ messages in thread
From: Verma, Vishal L @ 2026-03-23 21:54 UTC (permalink / raw)
To: Edgecombe, Rick P, seanjc@google.com, bp@alien8.de,
x86@kernel.org, kas@kernel.org, hpa@zytor.com, mingo@redhat.com,
dave.hansen@linux.intel.com, tglx@kernel.org, pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-coco@lists.linux.dev,
linux-kernel@vger.kernel.org
On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
>
[..]
> +void tdx_sys_disable(void)
> +{
> + struct tdx_module_args args = {};
> + u64 ret;
> +
> + /*
> + * Don't loop forever.
> + * - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
> + * calls.
> + * - TDX_SYS_BUSY could transiently contend with TDH.SYS.* SEAMCALLs,
> + * but will lock out future ones.
> + *
> + * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
> + * run after this until a full reinitialization is done.
> + */
> + do {
> + ret = seamcall(TDH_SYS_DISABLE, &args);
> + } while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
> +
> + /*
> + * Print SEAMCALL failures, but not SW-defined error codes
> + * (SEAMCALL faulted with #GP/#UD, TDX not supported).
> + */
> + if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
> + pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
> +}
Note - old TDX modules that don't implement this SEAMCALL produce a
message like:
virt/tdx: TDH.SYS.DISABLE failed: 0xc000010000000000
Where that code translates to TDX_OPERAND_INVALID.
This also serves as a nudge that the module should be updated.
It might be worth including a blurb about this in the commit message -
something like below. This could be included when applying, or I can
send an updated version with this if it is acceptable.
---
An error is printed if the SEAMCALL fails with anything other than the
error codes that cause retries, or 'synthesized' error codes produced
for #GP or #UD. e.g., an old module that has been properly initialized,
that doesn't implement SYS_DISABLE, returns TDX_OPERAND_INVALID. This
prints:
virt/tdx: TDH.SYS.DISABLE failed: 0xc000010000000000
But a system that doesn't have any TDX support at all doesn't print
anything.
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE
2026-03-23 20:59 ` [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE Vishal Verma
2026-03-23 21:54 ` Verma, Vishal L
@ 2026-03-23 22:40 ` Huang, Kai
2026-03-24 10:18 ` Chao Gao
2 siblings, 0 replies; 13+ messages in thread
From: Huang, Kai @ 2026-03-23 22:40 UTC (permalink / raw)
To: Edgecombe, Rick P, seanjc@google.com, bp@alien8.de,
x86@kernel.org, kas@kernel.org, hpa@zytor.com, mingo@redhat.com,
Verma, Vishal L, dave.hansen@linux.intel.com, tglx@kernel.org,
pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-coco@lists.linux.dev,
linux-kernel@vger.kernel.org
On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
> Some early TDX-capable platforms have an erratum where a partial write
> to TDX private memory can cause a machine check on a subsequent read.
> On these platforms, kexec and kdump have been disabled in these cases,
> because the old kernel cannot safely hand off TDX state to the new
> kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
> provides a way to cleanly disable TDX and allow kexec to proceed.
>
> The new SEAMCALL has an enumeration bit, but that is ignored. It is
> expected that users will be using the latest TDX module, and the failure
> mode for running the missing SEAMCALL on an older module is not fatal.
>
> This can be a long running operation, and the time needed largely
> depends on the amount of memory that has been allocated to TDs. If all
> TDs have been destroyed prior to the sys_disable call, then it is fast,
> with only needing to override the TDX module memory.
>
> After the SEAMCALL completes, the TDX module is disabled and all memory
> resources allocated to TDX are freed and reset. The next kernel can then
> re-initialize the TDX module from scratch via the normal TDX bring-up
> sequence.
>
> The SEAMCALL can return two different error codes that expect a retry.
> - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
> interrupt. However, it will not return until it makes some forward
> progress, so we can expect to complete even in the case of interrupt
> storms.
> - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
> SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
> block other SEAMCALLs once it gets going. So this contention will be
> short lived.
>
> So loop infinitely on either of these error codes, until success or other
> error.
>
> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>
Acked-by: Kai Huang <kai.huang@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE
2026-03-23 20:59 ` [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE Vishal Verma
2026-03-23 21:54 ` Verma, Vishal L
2026-03-23 22:40 ` Huang, Kai
@ 2026-03-24 10:18 ` Chao Gao
2 siblings, 0 replies; 13+ messages in thread
From: Chao Gao @ 2026-03-24 10:18 UTC (permalink / raw)
To: Vishal Verma
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini, linux-kernel, linux-coco, kvm
On Mon, Mar 23, 2026 at 02:59:06PM -0600, Vishal Verma wrote:
>Some early TDX-capable platforms have an erratum where a partial write
>to TDX private memory can cause a machine check on a subsequent read.
>On these platforms, kexec and kdump have been disabled in these cases,
>because the old kernel cannot safely hand off TDX state to the new
>kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
>provides a way to cleanly disable TDX and allow kexec to proceed.
>
>The new SEAMCALL has an enumeration bit, but that is ignored. It is
>expected that users will be using the latest TDX module, and the failure
>mode for running the missing SEAMCALL on an older module is not fatal.
>
>This can be a long running operation, and the time needed largely
>depends on the amount of memory that has been allocated to TDs. If all
>TDs have been destroyed prior to the sys_disable call, then it is fast,
>with only needing to override the TDX module memory.
>
>After the SEAMCALL completes, the TDX module is disabled and all memory
>resources allocated to TDX are freed and reset. The next kernel can then
>re-initialize the TDX module from scratch via the normal TDX bring-up
>sequence.
>
>The SEAMCALL can return two different error codes that expect a retry.
> - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
> interrupt. However, it will not return until it makes some forward
> progress, so we can expect to complete even in the case of interrupt
> storms.
> - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
> SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
> block other SEAMCALLs once it gets going. So this contention will be
> short lived.
>
>So loop infinitely on either of these error codes, until success or other
>error.
>
>Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 4/5] x86/tdx: Disable the TDX module during kexec and kdump
2026-03-23 20:59 [PATCH v2 0/5] Fuller TDX kexec support Vishal Verma
` (2 preceding siblings ...)
2026-03-23 20:59 ` [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE Vishal Verma
@ 2026-03-23 20:59 ` Vishal Verma
2026-03-23 22:41 ` Huang, Kai
2026-03-23 20:59 ` [PATCH v2 5/5] x86/virt/tdx: Remove kexec docs Vishal Verma
4 siblings, 1 reply; 13+ messages in thread
From: Vishal Verma @ 2026-03-23 20:59 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini
Cc: linux-kernel, linux-coco, kvm, Vishal Verma
Use the TDH.SYS.DISABLE SEAMCALL, which disables the TDX module,
reclaims all memory resources assigned to TDX, and clears any
partial-write induced poison, to allow kexec and kdump on platforms with
the partial write errata.
On TDX-capable platforms with the partial write erratum, kexec has been
disabled because the new kernel could hit a machine check reading a
previously poisoned memory location.
Later TDX modules support TDH.SYS.DISABLE, which disables the module and
reclaims all TDX memory resources, allowing the new kernel to re-initialize
TDX from scratch. This operation also clears the old memory, cleaning up
any poison.
Add tdx_sys_disable() to tdx_shutdown(), which is called in the
syscore_shutdown path for kexec. This is done just before tdx_shutdown()
disables VMX on all CPUs.
For kdump, call tdx_sys_disable() in the crash path before
x86_virt_emergency_disable_virtualization_cpu() does VMXOFF.
Since this clears any poison on TDX-managed memory, remove the
X86_BUG_TDX_PW_MCE check in machine_kexec() that blocked kexec on
partial write errata platforms.
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
arch/x86/kernel/crash.c | 2 ++
arch/x86/kernel/machine_kexec_64.c | 16 ----------------
arch/x86/virt/vmx/tdx/tdx.c | 1 +
3 files changed, 3 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index cd796818d94d..623d4474631a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -38,6 +38,7 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
+#include <asm/tdx.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -112,6 +113,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
+ tdx_sys_disable();
x86_virt_emergency_disable_virtualization_cpu();
/*
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 0590d399d4f1..c3f4a389992d 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -347,22 +347,6 @@ int machine_kexec_prepare(struct kimage *image)
unsigned long reloc_end = (unsigned long)__relocate_kernel_end;
int result;
- /*
- * Some early TDX-capable platforms have an erratum. A kernel
- * partial write (a write transaction of less than cacheline
- * lands at memory controller) to TDX private memory poisons that
- * memory, and a subsequent read triggers a machine check.
- *
- * On those platforms the old kernel must reset TDX private
- * memory before jumping to the new kernel otherwise the new
- * kernel may see unexpected machine check. For simplicity
- * just fail kexec/kdump on those platforms.
- */
- if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) {
- pr_info_once("Not allowed on platform with tdx_pw_mce bug\n");
- return -EOPNOTSUPP;
- }
-
/* Setup the identity mapped 64bit page table */
result = init_pgtable(image, __pa(control_page));
if (result)
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 3a76000dec7a..aaf22a87717a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -252,6 +252,7 @@ static void tdx_shutdown_cpu(void *ign)
static void tdx_shutdown(void *ign)
{
+ tdx_sys_disable();
on_each_cpu(tdx_shutdown_cpu, NULL, 1);
}
--
2.53.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v2 4/5] x86/tdx: Disable the TDX module during kexec and kdump
2026-03-23 20:59 ` [PATCH v2 4/5] x86/tdx: Disable the TDX module during kexec and kdump Vishal Verma
@ 2026-03-23 22:41 ` Huang, Kai
0 siblings, 0 replies; 13+ messages in thread
From: Huang, Kai @ 2026-03-23 22:41 UTC (permalink / raw)
To: Edgecombe, Rick P, seanjc@google.com, bp@alien8.de,
x86@kernel.org, kas@kernel.org, hpa@zytor.com, mingo@redhat.com,
Verma, Vishal L, dave.hansen@linux.intel.com, tglx@kernel.org,
pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-coco@lists.linux.dev,
linux-kernel@vger.kernel.org
On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
> Use the TDH.SYS.DISABLE SEAMCALL, which disables the TDX module,
> reclaims all memory resources assigned to TDX, and clears any
> partial-write induced poison, to allow kexec and kdump on platforms with
> the partial write errata.
>
> On TDX-capable platforms with the partial write erratum, kexec has been
> disabled because the new kernel could hit a machine check reading a
> previously poisoned memory location.
>
> Later TDX modules support TDH.SYS.DISABLE, which disables the module and
> reclaims all TDX memory resources, allowing the new kernel to re-initialize
> TDX from scratch. This operation also clears the old memory, cleaning up
> any poison.
>
> Add tdx_sys_disable() to tdx_shutdown(), which is called in the
> syscore_shutdown path for kexec. This is done just before tdx_shutdown()
> disables VMX on all CPUs.
>
> For kdump, call tdx_sys_disable() in the crash path before
> x86_virt_emergency_disable_virtualization_cpu() does VMXOFF.
>
> Since this clears any poison on TDX-managed memory, remove the
> X86_BUG_TDX_PW_MCE check in machine_kexec() that blocked kexec on
> partial write errata platforms.
>
> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>
Acked-by: Kai Huang <kai.huang@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 5/5] x86/virt/tdx: Remove kexec docs
2026-03-23 20:59 [PATCH v2 0/5] Fuller TDX kexec support Vishal Verma
` (3 preceding siblings ...)
2026-03-23 20:59 ` [PATCH v2 4/5] x86/tdx: Disable the TDX module during kexec and kdump Vishal Verma
@ 2026-03-23 20:59 ` Vishal Verma
2026-03-23 22:41 ` Huang, Kai
4 siblings, 1 reply; 13+ messages in thread
From: Vishal Verma @ 2026-03-23 20:59 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Kiryl Shutsemau, Rick Edgecombe,
Sean Christopherson, Paolo Bonzini
Cc: linux-kernel, linux-coco, kvm, Vishal Verma
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Recent changes have removed the hard limitations for using kexec and
TDX together. So remove the section in the TDX docs.
Users on partial write erratums will need an updated TDX module to
handle the rare edge cases. The docs do not currently provide any
guidance on recommended TDX module versions, so don't keep a whole
section around to document this interaction.
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
Documentation/arch/x86/tdx.rst | 7 -------
1 file changed, 7 deletions(-)
diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst
index ff6b110291bc..1a3b5bac1021 100644
--- a/Documentation/arch/x86/tdx.rst
+++ b/Documentation/arch/x86/tdx.rst
@@ -138,13 +138,6 @@ If the platform has such erratum, the kernel prints additional message in
machine check handler to tell user the machine check may be caused by
kernel bug on TDX private memory.
-Kexec
-~~~~~~~
-
-Currently kexec doesn't work on the TDX platforms with the aforementioned
-erratum. It fails when loading the kexec kernel image. Otherwise it
-works normally.
-
Interaction vs S3 and deeper states
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
2.53.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v2 5/5] x86/virt/tdx: Remove kexec docs
2026-03-23 20:59 ` [PATCH v2 5/5] x86/virt/tdx: Remove kexec docs Vishal Verma
@ 2026-03-23 22:41 ` Huang, Kai
0 siblings, 0 replies; 13+ messages in thread
From: Huang, Kai @ 2026-03-23 22:41 UTC (permalink / raw)
To: Edgecombe, Rick P, seanjc@google.com, bp@alien8.de,
x86@kernel.org, kas@kernel.org, hpa@zytor.com, mingo@redhat.com,
Verma, Vishal L, dave.hansen@linux.intel.com, tglx@kernel.org,
pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-coco@lists.linux.dev,
linux-kernel@vger.kernel.org
On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
> From: Rick Edgecombe <rick.p.edgecombe@intel.com>
>
> Recent changes have removed the hard limitations for using kexec and
> TDX together. So remove the section in the TDX docs.
>
> Users on partial write erratums will need an updated TDX module to
> handle the rare edge cases. The docs do not currently provide any
> guidance on recommended TDX module versions, so don't keep a whole
> section around to document this interaction.
>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>
Acked-by: Kai Huang <kai.huang@intel.com>
^ permalink raw reply [flat|nested] 13+ messages in thread