* [PATCH v2 1/6] KVM: x86: Add dedicated storage for guest RIP
2026-04-09 22:42 [PATCH v2 0/6] KVM: x86: Reg cleanups / prep work for APX Sean Christopherson
@ 2026-04-09 22:42 ` Sean Christopherson
2026-04-09 22:42 ` [PATCH v2 2/6] KVM: x86: Drop the "EX" part of "EXREG" to avoid collision with APX Sean Christopherson
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Sean Christopherson @ 2026-04-09 22:42 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini, Kiryl Shutsemau
Cc: kvm, x86, linux-coco, linux-kernel, Chang S . Bae
Add kvm_vcpu_arch.rip to track guest RIP instead of including it in the
generic regs[] array. Decoupling RIP from regs[] will allow using a
*completely* arbitrary index for RIP, as opposed to the mostly-arbitrary
index that is currently used. That in turn will allow using indices
16-31 to track R16-R31 that are coming with APX.
Note, although RIP can used for addressing, it does NOT have an
architecturally defined index, and so can't be reached via flows like
get_vmx_mem_address() where KVM "blindly" reads a general purpose register
given the SIB information reported by hardware. For RIP-relative
addressing, hardware reports the full "offset" in vmcs.EXIT_QUALIFICATION.
Note #2, keep the available/dirty tracking as RSP is context switched
through the VMCS, i.e. needs to be cached for VMX.
Opportunistically rename NR_VCPU_REGS to NR_VCPU_GENERAL_PURPOSE_REGS to
better capture what it tracks, and so that KVM can slot in R16-R13 without
running into weirdness where KVM's definition of "EXREG" doesn't line up
with APX's definition of "extended reg".
No functional change intended.
Cc: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 10 ++++++----
arch/x86/kvm/kvm_cache_regs.h | 12 ++++++++----
arch/x86/kvm/svm/sev.c | 2 +-
arch/x86/kvm/svm/svm.c | 6 +++---
arch/x86/kvm/vmx/vmx.c | 8 ++++----
arch/x86/kvm/vmx/vmx.h | 2 +-
6 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c470e40a00aa..68a11325e8bc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -191,10 +191,11 @@ enum kvm_reg {
VCPU_REGS_R14 = __VCPU_REGS_R14,
VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
- VCPU_REGS_RIP,
- NR_VCPU_REGS,
+ NR_VCPU_GENERAL_PURPOSE_REGS,
- VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+ VCPU_REG_RIP = NR_VCPU_GENERAL_PURPOSE_REGS,
+
+ VCPU_EXREG_PDPTR,
VCPU_EXREG_CR0,
/*
* Alias AMD's ERAPS (not a real register) to CR3 so that common code
@@ -799,7 +800,8 @@ struct kvm_vcpu_arch {
* rip and regs accesses must go through
* kvm_{register,rip}_{read,write} functions.
*/
- unsigned long regs[NR_VCPU_REGS];
+ unsigned long regs[NR_VCPU_GENERAL_PURPOSE_REGS];
+ unsigned long rip;
u32 regs_avail;
u32 regs_dirty;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 8ddb01191d6f..9b7df9de0e87 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -112,7 +112,7 @@ static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu
*/
static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg)
{
- if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS))
return 0;
if (!kvm_register_is_available(vcpu, reg))
@@ -124,7 +124,7 @@ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg
static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg,
unsigned long val)
{
- if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS))
return;
vcpu->arch.regs[reg] = val;
@@ -133,12 +133,16 @@ static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg,
static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
{
- return kvm_register_read_raw(vcpu, VCPU_REGS_RIP);
+ if (!kvm_register_is_available(vcpu, VCPU_REG_RIP))
+ kvm_x86_call(cache_reg)(vcpu, VCPU_REG_RIP);
+
+ return vcpu->arch.rip;
}
static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
{
- kvm_register_write_raw(vcpu, VCPU_REGS_RIP, val);
+ vcpu->arch.rip = val;
+ kvm_register_mark_dirty(vcpu, VCPU_REG_RIP);
}
static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75d0c03d69bc..2010b157e288 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -967,7 +967,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
#endif
- save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
+ save->rip = svm->vcpu.arch.rip;
/* Sync some non-GPR registers before encrypting */
save->xcr0 = svm->vcpu.arch.xcr0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e7fdd7a9c280..85edaee27b03 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4420,7 +4420,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+ svm->vmcb->save.rip = vcpu->arch.rip;
/*
* Disable singlestep if we're injecting an interrupt/exception.
@@ -4506,7 +4506,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
- vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+ vcpu->arch.rip = svm->vmcb->save.rip;
}
vcpu->arch.regs_dirty = 0;
@@ -4946,7 +4946,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+ svm->vmcb->save.rip = vcpu->arch.rip;
nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a29896a9ef14..577b0c6286ad 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2604,8 +2604,8 @@ void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
case VCPU_REGS_RSP:
vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
break;
- case VCPU_REGS_RIP:
- vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
+ case VCPU_REG_RIP:
+ vcpu->arch.rip = vmcs_readl(GUEST_RIP);
break;
case VCPU_EXREG_PDPTR:
if (enable_ept)
@@ -7536,8 +7536,8 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
- vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ if (kvm_register_is_dirty(vcpu, VCPU_REG_RIP))
+ vmcs_writel(GUEST_RIP, vcpu->arch.rip);
vcpu->arch.regs_dirty = 0;
if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index db84e8001da5..d0cc5f6c6879 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -620,7 +620,7 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
* cache on demand. Other registers not listed here are synced to
* the cache immediately after VM-Exit.
*/
-#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \
+#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REG_RIP) | \
(1 << VCPU_REGS_RSP) | \
(1 << VCPU_EXREG_RFLAGS) | \
(1 << VCPU_EXREG_PDPTR) | \
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v2 2/6] KVM: x86: Drop the "EX" part of "EXREG" to avoid collision with APX
2026-04-09 22:42 [PATCH v2 0/6] KVM: x86: Reg cleanups / prep work for APX Sean Christopherson
2026-04-09 22:42 ` [PATCH v2 1/6] KVM: x86: Add dedicated storage for guest RIP Sean Christopherson
@ 2026-04-09 22:42 ` Sean Christopherson
2026-04-09 22:42 ` [PATCH v2 3/6] KVM: nVMX: Do a bitwise-AND of regs_avail when switching active VMCS Sean Christopherson
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Sean Christopherson @ 2026-04-09 22:42 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini, Kiryl Shutsemau
Cc: kvm, x86, linux-coco, linux-kernel, Chang S . Bae
Now that NR_VCPU_REGS is no longer a thing, and now that now that RIP is
effectively an EXREG, drop the "EX" is for extended (or maybe extra?")
prefix from non-GPR registers to avoid a collision with APX (Advanced
Performance Extensions), which adds:
16 additional general-purpose registers (GPRs) R16–R31, also referred
to as Extended GPRs (EGPRs) in this document;
I.e. KVM's version of "extended" won't match with APX's definition.
No functional change intended.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 18 +++++++--------
arch/x86/kvm/kvm_cache_regs.h | 16 ++++++-------
arch/x86/kvm/svm/svm.c | 6 ++---
arch/x86/kvm/svm/svm.h | 2 +-
arch/x86/kvm/vmx/nested.c | 6 ++---
arch/x86/kvm/vmx/tdx.c | 4 ++--
arch/x86/kvm/vmx/vmx.c | 40 ++++++++++++++++-----------------
arch/x86/kvm/vmx/vmx.h | 20 ++++++++---------
arch/x86/kvm/x86.c | 16 ++++++-------
9 files changed, 64 insertions(+), 64 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 68a11325e8bc..b1eae1e7b04f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -195,8 +195,8 @@ enum kvm_reg {
VCPU_REG_RIP = NR_VCPU_GENERAL_PURPOSE_REGS,
- VCPU_EXREG_PDPTR,
- VCPU_EXREG_CR0,
+ VCPU_REG_PDPTR,
+ VCPU_REG_CR0,
/*
* Alias AMD's ERAPS (not a real register) to CR3 so that common code
* can trigger emulation of the RAP (Return Address Predictor) with
@@ -204,13 +204,13 @@ enum kvm_reg {
* is cleared on writes to CR3, i.e. marking CR3 dirty will naturally
* mark ERAPS dirty as well.
*/
- VCPU_EXREG_CR3,
- VCPU_EXREG_ERAPS = VCPU_EXREG_CR3,
- VCPU_EXREG_CR4,
- VCPU_EXREG_RFLAGS,
- VCPU_EXREG_SEGMENTS,
- VCPU_EXREG_EXIT_INFO_1,
- VCPU_EXREG_EXIT_INFO_2,
+ VCPU_REG_CR3,
+ VCPU_REG_ERAPS = VCPU_REG_CR3,
+ VCPU_REG_CR4,
+ VCPU_REG_RFLAGS,
+ VCPU_REG_SEGMENTS,
+ VCPU_REG_EXIT_INFO_1,
+ VCPU_REG_EXIT_INFO_2,
};
enum {
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 9b7df9de0e87..ac1f9867a234 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -159,8 +159,8 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
might_sleep(); /* on svm */
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
- kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_PDPTR);
+ if (!kvm_register_is_available(vcpu, VCPU_REG_PDPTR))
+ kvm_x86_call(cache_reg)(vcpu, VCPU_REG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
@@ -174,8 +174,8 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
if ((tmask & vcpu->arch.cr0_guest_owned_bits) &&
- !kvm_register_is_available(vcpu, VCPU_EXREG_CR0))
- kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR0);
+ !kvm_register_is_available(vcpu, VCPU_REG_CR0))
+ kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR0);
return vcpu->arch.cr0 & mask;
}
@@ -196,8 +196,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
if ((tmask & vcpu->arch.cr4_guest_owned_bits) &&
- !kvm_register_is_available(vcpu, VCPU_EXREG_CR4))
- kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR4);
+ !kvm_register_is_available(vcpu, VCPU_REG_CR4))
+ kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR4);
return vcpu->arch.cr4 & mask;
}
@@ -211,8 +211,8 @@ static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu,
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
- kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR3);
+ if (!kvm_register_is_available(vcpu, VCPU_REG_CR3))
+ kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR3);
return vcpu->arch.cr3;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 85edaee27b03..ee5749d8b3e8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1517,7 +1517,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
kvm_register_mark_available(vcpu, reg);
switch (reg) {
- case VCPU_EXREG_PDPTR:
+ case VCPU_REG_PDPTR:
/*
* When !npt_enabled, mmu->pdptrs[] is already available since
* it is always updated per SDM when moving to CRs.
@@ -4179,7 +4179,7 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_ERAPS);
svm_flush_tlb_asid(vcpu);
}
@@ -4457,7 +4457,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS) &&
- kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
+ kvm_register_is_dirty(vcpu, VCPU_REG_ERAPS))
svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
svm_fixup_nested_rips(vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index fd0652b32c81..677d268ae9c7 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -474,7 +474,7 @@ static inline bool svm_is_vmrun_failure(u64 exit_code)
* KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
* is changed. svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
*/
-#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
+#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_REG_PDPTR)
static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
{
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 3fe88f29be7a..22b1f06a9d40 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1189,7 +1189,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
}
vcpu->arch.cr3 = cr3;
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
kvm_init_mmu(vcpu);
@@ -4972,7 +4972,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
nested_ept_uninit_mmu_context(vcpu);
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_available(vcpu, VCPU_REG_CR3);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -5074,7 +5074,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_service_local_tlb_flush_requests(vcpu);
/*
- * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+ * VCPU_REG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
* now and the new vmentry. Ensure that the VMCS02 PDPTR fields are
* up-to-date before switching to L1.
*/
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 1e47c194af53..c23ec4ac8bc8 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1013,8 +1013,8 @@ static fastpath_t tdx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-#define TDX_REGS_AVAIL_SET (BIT_ULL(VCPU_EXREG_EXIT_INFO_1) | \
- BIT_ULL(VCPU_EXREG_EXIT_INFO_2) | \
+#define TDX_REGS_AVAIL_SET (BIT_ULL(VCPU_REG_EXIT_INFO_1) | \
+ BIT_ULL(VCPU_REG_EXIT_INFO_2) | \
BIT_ULL(VCPU_REGS_RAX) | \
BIT_ULL(VCPU_REGS_RBX) | \
BIT_ULL(VCPU_REGS_RCX) | \
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 577b0c6286ad..aa1c26018439 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -843,8 +843,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
bool ret;
u32 mask = 1 << (seg * SEG_FIELD_NR + field);
- if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
- kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
+ if (!kvm_register_is_available(&vmx->vcpu, VCPU_REG_SEGMENTS)) {
+ kvm_register_mark_available(&vmx->vcpu, VCPU_REG_SEGMENTS);
vmx->segment_cache.bitmask = 0;
}
ret = vmx->segment_cache.bitmask & mask;
@@ -1609,8 +1609,8 @@ unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long rflags, save_rflags;
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
- kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+ if (!kvm_register_is_available(vcpu, VCPU_REG_RFLAGS)) {
+ kvm_register_mark_available(vcpu, VCPU_REG_RFLAGS);
rflags = vmcs_readl(GUEST_RFLAGS);
if (vmx->rmode.vm86_active) {
rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -1633,7 +1633,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
* if L1 runs L2 as a restricted guest.
*/
if (is_unrestricted_guest(vcpu)) {
- kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+ kvm_register_mark_available(vcpu, VCPU_REG_RFLAGS);
vmx->rflags = rflags;
vmcs_writel(GUEST_RFLAGS, rflags);
return;
@@ -2607,17 +2607,17 @@ void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
case VCPU_REG_RIP:
vcpu->arch.rip = vmcs_readl(GUEST_RIP);
break;
- case VCPU_EXREG_PDPTR:
+ case VCPU_REG_PDPTR:
if (enable_ept)
ept_save_pdptrs(vcpu);
break;
- case VCPU_EXREG_CR0:
+ case VCPU_REG_CR0:
guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
vcpu->arch.cr0 &= ~guest_owned_bits;
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
break;
- case VCPU_EXREG_CR3:
+ case VCPU_REG_CR3:
/*
* When intercepting CR3 loads, e.g. for shadowing paging, KVM's
* CR3 is loaded into hardware, not the guest's CR3.
@@ -2625,7 +2625,7 @@ void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
break;
- case VCPU_EXREG_CR4:
+ case VCPU_REG_CR4:
guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
vcpu->arch.cr4 &= ~guest_owned_bits;
@@ -3350,7 +3350,7 @@ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
+ if (!kvm_register_is_dirty(vcpu, VCPU_REG_PDPTR))
return;
if (is_pae_paging(vcpu)) {
@@ -3373,7 +3373,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
- kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
+ kvm_register_mark_available(vcpu, VCPU_REG_PDPTR);
}
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
@@ -3416,7 +3416,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
+ kvm_register_mark_available(vcpu, VCPU_REG_CR0);
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
@@ -3434,8 +3434,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* (correctly) stop reading vmcs.GUEST_CR3 because it thinks
* KVM's CR3 is installed.
*/
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
- vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
+ if (!kvm_register_is_available(vcpu, VCPU_REG_CR3))
+ vmx_cache_reg(vcpu, VCPU_REG_CR3);
/*
* When running with EPT but not unrestricted guest, KVM must
@@ -3472,7 +3472,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* GUEST_CR3 is still vmx->ept_identity_map_addr if EPT + !URG.
*/
if (!(old_cr0_pg & X86_CR0_PG) && (cr0 & X86_CR0_PG))
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
}
/* depends on vcpu->arch.cr0 to be set to a new value */
@@ -3501,7 +3501,7 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
- else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3))
+ else if (kvm_register_is_dirty(vcpu, VCPU_REG_CR3))
guest_cr3 = vcpu->arch.cr3;
else /* vmcs.GUEST_CR3 is already up-to-date. */
update_guest_cr3 = false;
@@ -3561,7 +3561,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
vcpu->arch.cr4 = cr4;
- kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
+ kvm_register_mark_available(vcpu, VCPU_REG_CR4);
if (!enable_unrestricted_guest) {
if (enable_ept) {
@@ -5021,7 +5021,7 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
vmx_segment_cache_clear(vmx);
- kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
+ kvm_register_mark_available(vcpu, VCPU_REG_SEGMENTS);
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
@@ -7514,9 +7514,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
vmx->vt.exit_reason.full = EXIT_REASON_INVALID_STATE;
vmx->vt.exit_reason.failed_vmentry = 1;
- kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
+ kvm_register_mark_available(vcpu, VCPU_REG_EXIT_INFO_1);
vmx->vt.exit_qualification = ENTRY_FAIL_DEFAULT;
- kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
+ kvm_register_mark_available(vcpu, VCPU_REG_EXIT_INFO_2);
vmx->vt.exit_intr_info = 0;
return EXIT_FASTPATH_NONE;
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d0cc5f6c6879..9fb76ea48caf 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -317,7 +317,7 @@ static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
{
struct vcpu_vt *vt = to_vt(vcpu);
- if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) &&
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_REG_EXIT_INFO_1) &&
!WARN_ON_ONCE(is_td_vcpu(vcpu)))
vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -328,7 +328,7 @@ static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
{
struct vcpu_vt *vt = to_vt(vcpu);
- if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) &&
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_REG_EXIT_INFO_2) &&
!WARN_ON_ONCE(is_td_vcpu(vcpu)))
vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
@@ -622,14 +622,14 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
*/
#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REG_RIP) | \
(1 << VCPU_REGS_RSP) | \
- (1 << VCPU_EXREG_RFLAGS) | \
- (1 << VCPU_EXREG_PDPTR) | \
- (1 << VCPU_EXREG_SEGMENTS) | \
- (1 << VCPU_EXREG_CR0) | \
- (1 << VCPU_EXREG_CR3) | \
- (1 << VCPU_EXREG_CR4) | \
- (1 << VCPU_EXREG_EXIT_INFO_1) | \
- (1 << VCPU_EXREG_EXIT_INFO_2))
+ (1 << VCPU_REG_RFLAGS) | \
+ (1 << VCPU_REG_PDPTR) | \
+ (1 << VCPU_REG_SEGMENTS) | \
+ (1 << VCPU_REG_CR0) | \
+ (1 << VCPU_REG_CR3) | \
+ (1 << VCPU_REG_CR4) | \
+ (1 << VCPU_REG_EXIT_INFO_1) | \
+ (1 << VCPU_REG_EXIT_INFO_2))
static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..ac05cc289b56 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1090,14 +1090,14 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
}
/*
- * Marking VCPU_EXREG_PDPTR dirty doesn't work for !tdp_enabled.
+ * Marking VCPU_REG_PDPTR dirty doesn't work for !tdp_enabled.
* Shadow page roots need to be reconstructed instead.
*/
if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_PDPTR);
kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
vcpu->arch.pdptrs_from_userspace = false;
@@ -1478,7 +1478,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
/* Do not call post_set_cr3, we do not get here for confidential guests. */
handle_tlb_flush:
@@ -12473,7 +12473,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
vcpu->arch.cr2 = sregs->cr2;
*mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
kvm_x86_call(post_set_cr3)(vcpu, sregs->cr3);
kvm_set_cr8(vcpu, sregs->cr8);
@@ -12566,7 +12566,7 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
for (i = 0; i < 4 ; i++)
kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_PDPTR);
mmu_reset_needed = 1;
vcpu->arch.pdptrs_from_userspace = true;
}
@@ -13111,7 +13111,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_rip_write(vcpu, 0xfff0);
vcpu->arch.cr3 = 0;
- kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+ kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
@@ -14323,7 +14323,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
* the RAP (Return Address Predicator).
*/
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
- kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS);
+ kvm_register_is_dirty(vcpu, VCPU_REG_ERAPS);
kvm_invalidate_pcid(vcpu, operand.pcid);
return kvm_skip_emulated_instruction(vcpu);
@@ -14339,7 +14339,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
/*
- * Don't bother marking VCPU_EXREG_ERAPS dirty, SVM will take
+ * Don't bother marking VCPU_REG_ERAPS dirty, SVM will take
* care of doing so when emulating the full guest TLB flush
* (the RAP is cleared on all implicit TLB flushes).
*/
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v2 6/6] KVM: x86: Use a proper bitmap for tracking available/dirty registers
2026-04-09 22:42 [PATCH v2 0/6] KVM: x86: Reg cleanups / prep work for APX Sean Christopherson
` (4 preceding siblings ...)
2026-04-09 22:42 ` [PATCH v2 5/6] KVM: x86: Track available/dirty register masks as "unsigned long" values Sean Christopherson
@ 2026-04-09 22:42 ` Sean Christopherson
5 siblings, 0 replies; 7+ messages in thread
From: Sean Christopherson @ 2026-04-09 22:42 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini, Kiryl Shutsemau
Cc: kvm, x86, linux-coco, linux-kernel, Chang S . Bae
Define regs_{avail,dirty} as bitmaps instead of U32s to harden against
overflow, and to allow for dynamically sizing the bitmaps when APX comes
along, which will add 16 more GPRs (R16-R31) and thus increase the total
number of registers beyond 32.
Open code writes in the "reset" APIs, as the writes are hot paths and
bitmap_write() is complete overkill for what KVM needs. Even better,
hardcoding writes to entry '0' in the array is a perfect excuse to assert
that the array contains exactly one entry, e.g. to effectively add guard
against defining R16-R31 in 32-bit kernels.
For all intents and purposes, no functional change intended even though
using bitmap_fill() will mean "undefined" registers are no longer marked
available and dirty (KVM should never be querying those bits).
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 6 ++++--
arch/x86/kvm/kvm_cache_regs.h | 20 ++++++++++++--------
arch/x86/kvm/x86.c | 4 ++--
3 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c47eb294c066..ef0c368676c5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -211,6 +211,8 @@ enum kvm_reg {
VCPU_REG_SEGMENTS,
VCPU_REG_EXIT_INFO_1,
VCPU_REG_EXIT_INFO_2,
+
+ NR_VCPU_TOTAL_REGS,
};
enum {
@@ -802,8 +804,8 @@ struct kvm_vcpu_arch {
*/
unsigned long regs[NR_VCPU_GENERAL_PURPOSE_REGS];
unsigned long rip;
- unsigned long regs_avail;
- unsigned long regs_dirty;
+ DECLARE_BITMAP(regs_avail, NR_VCPU_TOTAL_REGS);
+ DECLARE_BITMAP(regs_dirty, NR_VCPU_TOTAL_REGS);
unsigned long cr0;
unsigned long cr0_guest_owned_bits;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 171e6bc2e169..2ae492ad6412 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -67,29 +67,29 @@ static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
kvm_assert_register_caching_allowed(vcpu);
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ return test_bit(reg, vcpu->arch.regs_avail);
}
static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
kvm_assert_register_caching_allowed(vcpu);
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+ return test_bit(reg, vcpu->arch.regs_dirty);
}
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
kvm_assert_register_caching_allowed(vcpu);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, vcpu->arch.regs_avail);
}
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
kvm_assert_register_caching_allowed(vcpu);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+ __set_bit(reg, vcpu->arch.regs_avail);
+ __set_bit(reg, vcpu->arch.regs_dirty);
}
/*
@@ -102,12 +102,15 @@ static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu
enum kvm_reg reg)
{
kvm_assert_register_caching_allowed(vcpu);
- return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ return arch___test_and_set_bit(reg, vcpu->arch.regs_avail);
}
static __always_inline void kvm_clear_available_registers(struct kvm_vcpu *vcpu,
unsigned long clear_mask)
{
+ BUILD_BUG_ON(sizeof(clear_mask) != sizeof(vcpu->arch.regs_avail[0]));
+ BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.regs_avail) != 1);
+
/*
* Note the bitwise-AND! In practice, a straight write would also work
* as KVM initializes the mask to all ones and never clears registers
@@ -115,12 +118,13 @@ static __always_inline void kvm_clear_available_registers(struct kvm_vcpu *vcpu,
* sanity checking as incorrectly marking an eagerly sync'd register
* unavailable will generate a WARN due to an unexpected cache request.
*/
- vcpu->arch.regs_avail &= ~clear_mask;
+ vcpu->arch.regs_avail[0] &= ~clear_mask;
}
static __always_inline void kvm_reset_dirty_registers(struct kvm_vcpu *vcpu)
{
- vcpu->arch.regs_dirty = 0;
+ BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.regs_dirty) != 1);
+ vcpu->arch.regs_dirty[0] = 0;
}
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ac05cc289b56..b8a91feec8e1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12836,8 +12836,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
int r;
vcpu->arch.last_vmentry_cpu = -1;
- vcpu->arch.regs_avail = ~0;
- vcpu->arch.regs_dirty = ~0;
+ bitmap_fill(vcpu->arch.regs_avail, NR_VCPU_TOTAL_REGS);
+ bitmap_fill(vcpu->arch.regs_dirty, NR_VCPU_TOTAL_REGS);
kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread