* [PATCH 0/3] KVM: Macrofy GPR swapping in assembly code
@ 2026-05-13 17:49 Paolo Bonzini
2026-05-13 17:49 ` [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() Paolo Bonzini
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Paolo Bonzini @ 2026-05-13 17:49 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: chang.seok.bae
A small change to Chang's patches from the APX series.
The only change is keeping __VCPU_REGS_* constants to avoid
relying on the ordering of constants in kvm_host.h.
While the kvm_vcpu_regs.h changes make the line count boom a bit,
the actual assembly code loses almost a hundred lines and
APX makes that even more extreme.
Paolo
Chang S. Bae (3):
KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run()
KVM: SVM: Macrofy GPR swapping in __svm_vcpu_run()
KVM: SEV: Macrofy GPR swapping in __svm_sev_es_vcpu_run()
arch/x86/include/asm/kvm_host.h | 16 ++--
arch/x86/include/asm/kvm_vcpu_regs.h | 127 +++++++++++++++++++++++++--
arch/x86/kvm/svm/vmenter.S | 107 +++++-----------------
arch/x86/kvm/vmenter.h | 43 +++++++++
arch/x86/kvm/vmx/vmenter.S | 89 +++++--------------
5 files changed, 211 insertions(+), 171 deletions(-)
--
2.52.0
^ permalink raw reply [flat|nested] 5+ messages in thread* [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() 2026-05-13 17:49 [PATCH 0/3] KVM: Macrofy GPR swapping in assembly code Paolo Bonzini @ 2026-05-13 17:49 ` Paolo Bonzini 2026-05-14 19:30 ` Sean Christopherson 2026-05-13 17:49 ` [PATCH 2/3] KVM: SVM: Macrofy GPR swapping in __svm_vcpu_run() Paolo Bonzini 2026-05-13 17:49 ` [PATCH 3/3] KVM: SEV: Macrofy GPR swapping in __svm_sev_es_vcpu_run() Paolo Bonzini 2 siblings, 1 reply; 5+ messages in thread From: Paolo Bonzini @ 2026-05-13 17:49 UTC (permalink / raw) To: linux-kernel, kvm; +Cc: chang.seok.bae, x86 From: "Chang S. Bae" <chang.seok.bae@intel.com> Convert the repeated register save/restore sequences into macros to simplify the VM entry code. This can also make it easier to extend for additional registers. No functional change intended. Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Cc: x86@kernel.org Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com> Link: https://lore.kernel.org/6e67df0e-e5f0-43f5-aa86-22e8b01b75d2@redhat.com Message-ID: <20260512011502.53072-2-chang.seok.bae@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/include/asm/kvm_vcpu_regs.h | 122 +++++++++++++++++++++++++++ arch/x86/kvm/svm/vmenter.S | 3 - arch/x86/kvm/vmenter.h | 43 ++++++++++ arch/x86/kvm/vmx/vmenter.S | 89 +++++-------------- 4 files changed, 185 insertions(+), 72 deletions(-) diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h index 1af2cb59233b..7c12a1fd1ffc 100644 --- a/arch/x86/include/asm/kvm_vcpu_regs.h +++ b/arch/x86/include/asm/kvm_vcpu_regs.h @@ -22,4 +22,126 @@ #define __VCPU_REGS_R15 15 #endif +#ifdef __ASSEMBLER__ + +#define REG_NUM_INVALID 100 + + .macro R32_NUM opd r32 + \opd = REG_NUM_INVALID + .ifc \r32,%eax + \opd = __VCPU_REGS_EAX + .endif + .ifc \r32,%ecx + \opd = __VCPU_REGS_ECX + .endif + .ifc \r32,%edx + \opd = __VCPU_REGS_EDX + .endif + .ifc \r32,%ebx + \opd = __VCPU_REGS_EBX + .endif + .ifc \r32,%esp + \opd = __VCPU_REGS_ESP + .endif + .ifc \r32,%ebp + \opd = __VCPU_REGS_EBP + .endif + .ifc \r32,%esi + \opd = __VCPU_REGS_ESI + .endif + .ifc \r32,%edi + \opd = __VCPU_REGS_EDI + .endif +#ifdef CONFIG_X86_64 + .ifc \r32,%r8d + \opd = 8 + .endif + .ifc \r32,%r9d + \opd = 9 + .endif + .ifc \r32,%r10d + \opd = 10 + .endif + .ifc \r32,%r11d + \opd = 11 + .endif + .ifc \r32,%r12d + \opd = 12 + .endif + .ifc \r32,%r13d + \opd = 13 + .endif + .ifc \r32,%r14d + \opd = 14 + .endif + .ifc \r32,%r15d + \opd = 15 + .endif +#endif + .endm + + .macro R64_NUM opd r64 + \opd = REG_NUM_INVALID +#ifdef CONFIG_X86_64 + .ifc \r64,%rax + \opd = __VCPU_REGS_RAX + .endif + .ifc \r64,%rcx + \opd = __VCPU_REGS_RCX + .endif + .ifc \r64,%rdx + \opd = __VCPU_REGS_RDX + .endif + .ifc \r64,%rbx + \opd = __VCPU_REGS_RBX + .endif + .ifc \r64,%rsp + \opd = __VCPU_REGS_RSP + .endif + .ifc \r64,%rbp + \opd = __VCPU_REGS_RBP + .endif + .ifc \r64,%rsi + \opd = __VCPU_REGS_RSI + .endif + .ifc \r64,%rdi + \opd = __VCPU_REGS_RDI + .endif + .ifc \r64,%r8 + \opd = 8 + .endif + .ifc \r64,%r9 + \opd = 9 + .endif + .ifc \r64,%r10 + \opd = 10 + .endif + .ifc \r64,%r11 + \opd = 11 + .endif + .ifc \r64,%r12 + \opd = 12 + .endif + .ifc \r64,%r13 + \opd = 13 + .endif + .ifc \r64,%r14 + \opd = 14 + .endif + .ifc \r64,%r15 + \opd = 15 + .endif +#endif + .endm + +.macro REG_NUM reg_num reg +#ifdef CONFIG_X86_64 + R64_NUM \reg_num \reg +#else + R32_NUM \reg_num \reg +#endif +.endm + +#endif + #endif /* _ASM_X86_KVM_VCPU_REGS_H */ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index f523d9e49839..6a91e1383e8f 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -4,13 +4,10 @@ #include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/frame.h> -#include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include "kvm-asm-offsets.h" #include "vmenter.h" -#define WORD_SIZE (BITS_PER_LONG / 8) - /* Intentionally omit RAX as it's context switched by hardware */ #define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) #define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) diff --git a/arch/x86/kvm/vmenter.h b/arch/x86/kvm/vmenter.h index ba3f71449c62..44574806cb02 100644 --- a/arch/x86/kvm/vmenter.h +++ b/arch/x86/kvm/vmenter.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_VMENTER_H #define __KVM_X86_VMENTER_H +#include <asm/kvm_vcpu_regs.h> + #define KVM_ENTER_VMRESUME BIT(0) #define KVM_ENTER_SAVE_SPEC_CTRL BIT(1) #define KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(2) @@ -76,5 +78,46 @@ wrmsr .endm +#define WORD_SIZE (BITS_PER_LONG / 8) + +.macro LOAD_REGS src:req, regs_ofs:req, regs:vararg +.irp reg, \regs + REG_NUM reg_num \reg + .if reg_num <> REG_NUM_INVALID + mov (\regs_ofs + reg_num * WORD_SIZE)(\src), \reg + .else + .err invalid register \reg + .endif +.endr +.endm + +.macro STORE_REGS dst:req, regs_ofs:req, regs:vararg +.irp reg, \regs + REG_NUM reg_num \reg + .if reg_num <> REG_NUM_INVALID + mov \reg, (\regs_ofs + reg_num * WORD_SIZE)(\dst) + .else + .err invalid register \reg + .endif +.endr +.endm + +.macro POP_REGS dst:req, regs_ofs:req, regs:vararg +.irp reg, \regs + REG_NUM reg_num \reg + .if reg_num <> REG_NUM_INVALID + pop (\regs_ofs + reg_num * WORD_SIZE)(\dst) + .else + .err invalid register \reg + .endif +.endr +.endm + +.macro CLEAR_REGS regs:vararg +.irp reg, \regs + xorl \reg, \reg +.endr +.endm + #endif /* __ASSEMBLER__ */ #endif /* __KVM_X86_VMENTER_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 7e4dc17fc0b8..4b7aaa7430fb 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -2,35 +2,12 @@ #include <linux/linkage.h> #include <asm/asm.h> #include <asm/bitsperlong.h> -#include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include <asm/percpu.h> #include <asm/segment.h> #include "kvm-asm-offsets.h" #include "vmenter.h" -#define WORD_SIZE (BITS_PER_LONG / 8) - -#define VCPU_RAX (VMX_vcpu_arch_regs + __VCPU_REGS_RAX * WORD_SIZE) -#define VCPU_RCX (VMX_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) -#define VCPU_RDX (VMX_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) -#define VCPU_RBX (VMX_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) -/* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP (VMX_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) -#define VCPU_RSI (VMX_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) -#define VCPU_RDI (VMX_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) - -#ifdef CONFIG_X86_64 -#define VCPU_R8 (VMX_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) -#define VCPU_R9 (VMX_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) -#define VCPU_R10 (VMX_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) -#define VCPU_R11 (VMX_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) -#define VCPU_R12 (VMX_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) -#define VCPU_R13 (VMX_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) -#define VCPU_R14 (VMX_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) -#define VCPU_R15 (VMX_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) -#endif - .macro VMX_DO_EVENT_IRQOFF call_insn call_target /* * Unconditionally create a stack frame, getting the correct RSP on the @@ -114,25 +91,18 @@ SYM_FUNC_START(__vmx_vcpu_run) * an LFENCE to stop speculation from skipping the wrmsr. */ - /* Load guest registers. Don't clobber flags. */ - mov VCPU_RAX(%_ASM_DI), %_ASM_AX - mov VCPU_RCX(%_ASM_DI), %_ASM_CX - mov VCPU_RDX(%_ASM_DI), %_ASM_DX - mov VCPU_RBX(%_ASM_DI), %_ASM_BX - mov VCPU_RBP(%_ASM_DI), %_ASM_BP - mov VCPU_RSI(%_ASM_DI), %_ASM_SI + /* + * Load guest registers. Don't clobber flags. Intentionally omit + * %_ASM_SP as it's context switched by hardware + */ + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ + %_ASM_AX, %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_DI), %r8 - mov VCPU_R9 (%_ASM_DI), %r9 - mov VCPU_R10(%_ASM_DI), %r10 - mov VCPU_R11(%_ASM_DI), %r11 - mov VCPU_R12(%_ASM_DI), %r12 - mov VCPU_R13(%_ASM_DI), %r13 - mov VCPU_R14(%_ASM_DI), %r14 - mov VCPU_R15(%_ASM_DI), %r15 + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 #endif /* Load guest RDI. This kills the @vmx pointer! */ - mov VCPU_RDI(%_ASM_DI), %_ASM_DI + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, %_ASM_DI /* * Note, ALTERNATIVE_2 works in reverse order. If CLEAR_CPU_BUF_VM is @@ -187,23 +157,16 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) /* Reload @vmx to RDI. */ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI - /* Save all guest registers, including RDI from the stack */ - mov %_ASM_AX, VCPU_RAX(%_ASM_DI) - mov %_ASM_CX, VCPU_RCX(%_ASM_DI) - mov %_ASM_DX, VCPU_RDX(%_ASM_DI) - mov %_ASM_BX, VCPU_RBX(%_ASM_DI) - mov %_ASM_BP, VCPU_RBP(%_ASM_DI) - mov %_ASM_SI, VCPU_RSI(%_ASM_DI) - pop VCPU_RDI(%_ASM_DI) + /* + * Save all guest registers, including RDI from the stack. Intentionally + * omit %_ASM_SP as it's context switched by hardware + */ + STORE_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ + %_ASM_AX, %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI + POP_REGS %_ASM_DI, VMX_vcpu_arch_regs, %_ASM_DI #ifdef CONFIG_X86_64 - mov %r8, VCPU_R8 (%_ASM_DI) - mov %r9, VCPU_R9 (%_ASM_DI) - mov %r10, VCPU_R10(%_ASM_DI) - mov %r11, VCPU_R11(%_ASM_DI) - mov %r12, VCPU_R12(%_ASM_DI) - mov %r13, VCPU_R13(%_ASM_DI) - mov %r14, VCPU_R14(%_ASM_DI) - mov %r15, VCPU_R15(%_ASM_DI) + STORE_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 #endif /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ @@ -220,21 +183,9 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return * value. */ - xor %eax, %eax - xor %ecx, %ecx - xor %edx, %edx - xor %ebp, %ebp - xor %esi, %esi - xor %edi, %edi + CLEAR_REGS %eax, %ecx, %edx, %ebp, %esi, %edi #ifdef CONFIG_X86_64 - xor %r8d, %r8d - xor %r9d, %r9d - xor %r10d, %r10d - xor %r11d, %r11d - xor %r12d, %r12d - xor %r13d, %r13d - xor %r14d, %r14d - xor %r15d, %r15d + CLEAR_REGS %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d #endif /* -- 2.52.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() 2026-05-13 17:49 ` [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() Paolo Bonzini @ 2026-05-14 19:30 ` Sean Christopherson 0 siblings, 0 replies; 5+ messages in thread From: Sean Christopherson @ 2026-05-14 19:30 UTC (permalink / raw) To: Paolo Bonzini; +Cc: linux-kernel, kvm, chang.seok.bae, x86 On Wed, May 13, 2026, Paolo Bonzini wrote: > From: "Chang S. Bae" <chang.seok.bae@intel.com> > > Convert the repeated register save/restore sequences into macros to > simplify the VM entry code. This is not simpler. Maybe it ends up being less verbose, but I don't see how anyone can claim this is simpler. E.g. for people like me that aren't already familiar with the .ifc directive, the R32_NUM macro is inscrutable. I can at least suss out what e.g. LOAD_REGS and friends are doing, but claiming the code is "simpler" is rather ridiculous. > This can also make it easier to extend for additional registers. For me, this needs to be the focal point of the changelog, and it needs to be written with --verbose, because the impact of APX on the VM-Entry/VM-Exit code isn't so obvious that a one-line "this makes future life easier" sufficiently justifies the macro magic. And that's coming from someone that generally loves macro magic :-) > diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h > index 1af2cb59233b..7c12a1fd1ffc 100644 > --- a/arch/x86/include/asm/kvm_vcpu_regs.h > +++ b/arch/x86/include/asm/kvm_vcpu_regs.h > @@ -22,4 +22,126 @@ > #define __VCPU_REGS_R15 15 > #endif > > +#ifdef __ASSEMBLER__ > + > +#define REG_NUM_INVALID 100 > + > + .macro R32_NUM opd r32 > + \opd = REG_NUM_INVALID > + .ifc \r32,%eax > + \opd = __VCPU_REGS_EAX > + .endif > + .ifc \r32,%ecx > + \opd = __VCPU_REGS_ECX > + .endif > + .ifc \r32,%edx > + \opd = __VCPU_REGS_EDX > + .endif > + .ifc \r32,%ebx > + \opd = __VCPU_REGS_EBX > + .endif > + .ifc \r32,%esp > + \opd = __VCPU_REGS_ESP > + .endif > + .ifc \r32,%ebp > + \opd = __VCPU_REGS_EBP > + .endif > + .ifc \r32,%esi > + \opd = __VCPU_REGS_ESI > + .endif > + .ifc \r32,%edi > + \opd = __VCPU_REGS_EDI > + .endif > +#ifdef CONFIG_X86_64 > + .ifc \r32,%r8d > + \opd = 8 > + .endif > + .ifc \r32,%r9d > + \opd = 9 > + .endif > + .ifc \r32,%r10d > + \opd = 10 > + .endif > + .ifc \r32,%r11d > + \opd = 11 > + .endif > + .ifc \r32,%r12d > + \opd = 12 > + .endif > + .ifc \r32,%r13d > + \opd = 13 > + .endif > + .ifc \r32,%r14d > + \opd = 14 > + .endif > + .ifc \r32,%r15d > + \opd = 15 > + .endif > +#endif > + .endm > + > + .macro R64_NUM opd r64 > + \opd = REG_NUM_INVALID > +#ifdef CONFIG_X86_64 > + .ifc \r64,%rax > + \opd = __VCPU_REGS_RAX > + .endif > + .ifc \r64,%rcx > + \opd = __VCPU_REGS_RCX > + .endif > + .ifc \r64,%rdx > + \opd = __VCPU_REGS_RDX > + .endif > + .ifc \r64,%rbx > + \opd = __VCPU_REGS_RBX > + .endif > + .ifc \r64,%rsp > + \opd = __VCPU_REGS_RSP > + .endif > + .ifc \r64,%rbp > + \opd = __VCPU_REGS_RBP > + .endif > + .ifc \r64,%rsi > + \opd = __VCPU_REGS_RSI > + .endif > + .ifc \r64,%rdi > + \opd = __VCPU_REGS_RDI > + .endif > + .ifc \r64,%r8 > + \opd = 8 > + .endif > + .ifc \r64,%r9 > + \opd = 9 > + .endif > + .ifc \r64,%r10 > + \opd = 10 > + .endif > + .ifc \r64,%r11 > + \opd = 11 > + .endif > + .ifc \r64,%r12 > + \opd = 12 > + .endif > + .ifc \r64,%r13 > + \opd = 13 > + .endif > + .ifc \r64,%r14 > + \opd = 14 > + .endif > + .ifc \r64,%r15 > + \opd = 15 > + .endif > +#endif > + .endm > + > +.macro REG_NUM reg_num reg > +#ifdef CONFIG_X86_64 > + R64_NUM \reg_num \reg > +#else > + R32_NUM \reg_num \reg > +#endif > +.endm > + > +#endif > + > #endif /* _ASM_X86_KVM_VCPU_REGS_H */ > diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S > index f523d9e49839..6a91e1383e8f 100644 > --- a/arch/x86/kvm/svm/vmenter.S > +++ b/arch/x86/kvm/svm/vmenter.S > @@ -4,13 +4,10 @@ > #include <asm/asm-offsets.h> > #include <asm/bitsperlong.h> > #include <asm/frame.h> > -#include <asm/kvm_vcpu_regs.h> > #include <asm/nospec-branch.h> > #include "kvm-asm-offsets.h" > #include "vmenter.h" > > -#define WORD_SIZE (BITS_PER_LONG / 8) > - > /* Intentionally omit RAX as it's context switched by hardware */ > #define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) > #define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) > diff --git a/arch/x86/kvm/vmenter.h b/arch/x86/kvm/vmenter.h > index ba3f71449c62..44574806cb02 100644 > --- a/arch/x86/kvm/vmenter.h > +++ b/arch/x86/kvm/vmenter.h > @@ -2,6 +2,8 @@ > #ifndef __KVM_X86_VMENTER_H > #define __KVM_X86_VMENTER_H > > +#include <asm/kvm_vcpu_regs.h> > + > #define KVM_ENTER_VMRESUME BIT(0) > #define KVM_ENTER_SAVE_SPEC_CTRL BIT(1) > #define KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(2) > @@ -76,5 +78,46 @@ > wrmsr > .endm > > +#define WORD_SIZE (BITS_PER_LONG / 8) > + > +.macro LOAD_REGS src:req, regs_ofs:req, regs:vararg > +.irp reg, \regs > + REG_NUM reg_num \reg > + .if reg_num <> REG_NUM_INVALID > + mov (\regs_ofs + reg_num * WORD_SIZE)(\src), \reg > + .else > + .err invalid register \reg > + .endif > +.endr > +.endm > + > +.macro STORE_REGS dst:req, regs_ofs:req, regs:vararg > +.irp reg, \regs > + REG_NUM reg_num \reg > + .if reg_num <> REG_NUM_INVALID > + mov \reg, (\regs_ofs + reg_num * WORD_SIZE)(\dst) > + .else > + .err invalid register \reg > + .endif > +.endr > +.endm > + > +.macro POP_REGS dst:req, regs_ofs:req, regs:vararg > +.irp reg, \regs > + REG_NUM reg_num \reg > + .if reg_num <> REG_NUM_INVALID > + pop (\regs_ofs + reg_num * WORD_SIZE)(\dst) > + .else > + .err invalid register \reg > + .endif > +.endr > +.endm > + > +.macro CLEAR_REGS regs:vararg > +.irp reg, \regs > + xorl \reg, \reg > +.endr > +.endm > + > #endif /* __ASSEMBLER__ */ > #endif /* __KVM_X86_VMENTER_H */ > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index 7e4dc17fc0b8..4b7aaa7430fb 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -2,35 +2,12 @@ > #include <linux/linkage.h> > #include <asm/asm.h> > #include <asm/bitsperlong.h> > -#include <asm/kvm_vcpu_regs.h> > #include <asm/nospec-branch.h> > #include <asm/percpu.h> > #include <asm/segment.h> > #include "kvm-asm-offsets.h" > #include "vmenter.h" > > -#define WORD_SIZE (BITS_PER_LONG / 8) > - > -#define VCPU_RAX (VMX_vcpu_arch_regs + __VCPU_REGS_RAX * WORD_SIZE) > -#define VCPU_RCX (VMX_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) > -#define VCPU_RDX (VMX_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) > -#define VCPU_RBX (VMX_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) > -/* Intentionally omit RSP as it's context switched by hardware */ > -#define VCPU_RBP (VMX_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) > -#define VCPU_RSI (VMX_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) > -#define VCPU_RDI (VMX_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) > - > -#ifdef CONFIG_X86_64 > -#define VCPU_R8 (VMX_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) > -#define VCPU_R9 (VMX_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) > -#define VCPU_R10 (VMX_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) > -#define VCPU_R11 (VMX_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) > -#define VCPU_R12 (VMX_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) > -#define VCPU_R13 (VMX_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) > -#define VCPU_R14 (VMX_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) > -#define VCPU_R15 (VMX_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) > -#endif > - > .macro VMX_DO_EVENT_IRQOFF call_insn call_target > /* > * Unconditionally create a stack frame, getting the correct RSP on the > @@ -114,25 +91,18 @@ SYM_FUNC_START(__vmx_vcpu_run) > * an LFENCE to stop speculation from skipping the wrmsr. > */ > > - /* Load guest registers. Don't clobber flags. */ > - mov VCPU_RAX(%_ASM_DI), %_ASM_AX > - mov VCPU_RCX(%_ASM_DI), %_ASM_CX > - mov VCPU_RDX(%_ASM_DI), %_ASM_DX > - mov VCPU_RBX(%_ASM_DI), %_ASM_BX > - mov VCPU_RBP(%_ASM_DI), %_ASM_BP > - mov VCPU_RSI(%_ASM_DI), %_ASM_SI > + /* > + * Load guest registers. Don't clobber flags. Intentionally omit > + * %_ASM_SP as it's context switched by hardware > + */ > + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ > + %_ASM_AX, %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI > #ifdef CONFIG_X86_64 > - mov VCPU_R8 (%_ASM_DI), %r8 > - mov VCPU_R9 (%_ASM_DI), %r9 > - mov VCPU_R10(%_ASM_DI), %r10 > - mov VCPU_R11(%_ASM_DI), %r11 > - mov VCPU_R12(%_ASM_DI), %r12 > - mov VCPU_R13(%_ASM_DI), %r13 > - mov VCPU_R14(%_ASM_DI), %r14 > - mov VCPU_R15(%_ASM_DI), %r15 > + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ > + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 > #endif > /* Load guest RDI. This kills the @vmx pointer! */ > - mov VCPU_RDI(%_ASM_DI), %_ASM_DI > + LOAD_REGS %_ASM_DI, VMX_vcpu_arch_regs, %_ASM_DI > > /* > * Note, ALTERNATIVE_2 works in reverse order. If CLEAR_CPU_BUF_VM is > @@ -187,23 +157,16 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) > /* Reload @vmx to RDI. */ > mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI > > - /* Save all guest registers, including RDI from the stack */ > - mov %_ASM_AX, VCPU_RAX(%_ASM_DI) > - mov %_ASM_CX, VCPU_RCX(%_ASM_DI) > - mov %_ASM_DX, VCPU_RDX(%_ASM_DI) > - mov %_ASM_BX, VCPU_RBX(%_ASM_DI) > - mov %_ASM_BP, VCPU_RBP(%_ASM_DI) > - mov %_ASM_SI, VCPU_RSI(%_ASM_DI) > - pop VCPU_RDI(%_ASM_DI) > + /* > + * Save all guest registers, including RDI from the stack. Intentionally > + * omit %_ASM_SP as it's context switched by hardware > + */ > + STORE_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ > + %_ASM_AX, %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI > + POP_REGS %_ASM_DI, VMX_vcpu_arch_regs, %_ASM_DI > #ifdef CONFIG_X86_64 > - mov %r8, VCPU_R8 (%_ASM_DI) > - mov %r9, VCPU_R9 (%_ASM_DI) > - mov %r10, VCPU_R10(%_ASM_DI) > - mov %r11, VCPU_R11(%_ASM_DI) > - mov %r12, VCPU_R12(%_ASM_DI) > - mov %r13, VCPU_R13(%_ASM_DI) > - mov %r14, VCPU_R14(%_ASM_DI) > - mov %r15, VCPU_R15(%_ASM_DI) > + STORE_REGS %_ASM_DI, VMX_vcpu_arch_regs, \ > + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 > #endif > > /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ > @@ -220,21 +183,9 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) > * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return > * value. > */ > - xor %eax, %eax > - xor %ecx, %ecx > - xor %edx, %edx > - xor %ebp, %ebp > - xor %esi, %esi > - xor %edi, %edi > + CLEAR_REGS %eax, %ecx, %edx, %ebp, %esi, %edi > #ifdef CONFIG_X86_64 > - xor %r8d, %r8d > - xor %r9d, %r9d > - xor %r10d, %r10d > - xor %r11d, %r11d > - xor %r12d, %r12d > - xor %r13d, %r13d > - xor %r14d, %r14d > - xor %r15d, %r15d > + CLEAR_REGS %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d > #endif > > /* > -- > 2.52.0 > > ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3] KVM: SVM: Macrofy GPR swapping in __svm_vcpu_run() 2026-05-13 17:49 [PATCH 0/3] KVM: Macrofy GPR swapping in assembly code Paolo Bonzini 2026-05-13 17:49 ` [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() Paolo Bonzini @ 2026-05-13 17:49 ` Paolo Bonzini 2026-05-13 17:49 ` [PATCH 3/3] KVM: SEV: Macrofy GPR swapping in __svm_sev_es_vcpu_run() Paolo Bonzini 2 siblings, 0 replies; 5+ messages in thread From: Paolo Bonzini @ 2026-05-13 17:49 UTC (permalink / raw) To: linux-kernel, kvm; +Cc: chang.seok.bae From: "Chang S. Bae" <chang.seok.bae@intel.com> Convert the register save/restore sequences in the SVM entry into macros, following the VMX code. Drop the now-redundant register offset defines. No functional change intended. Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com> Message-ID: <20260512011502.53072-3-chang.seok.bae@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/svm/vmenter.S | 83 +++++++++----------------------------- 1 file changed, 19 insertions(+), 64 deletions(-) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 6a91e1383e8f..06a8cb58f187 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -8,26 +8,6 @@ #include "kvm-asm-offsets.h" #include "vmenter.h" -/* Intentionally omit RAX as it's context switched by hardware */ -#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) -#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) -#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) -/* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) -#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) -#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) - -#ifdef CONFIG_X86_64 -#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) -#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) -#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) -#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) -#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) -#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) -#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) -#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) -#endif - #define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa) .section .noinstr.text, "ax" @@ -108,23 +88,17 @@ SYM_FUNC_START(__svm_vcpu_run) mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX - /* Load guest registers. */ - mov VCPU_RCX(%_ASM_DI), %_ASM_CX - mov VCPU_RDX(%_ASM_DI), %_ASM_DX - mov VCPU_RBX(%_ASM_DI), %_ASM_BX - mov VCPU_RBP(%_ASM_DI), %_ASM_BP - mov VCPU_RSI(%_ASM_DI), %_ASM_SI + /* + * Load guest registers. Intentionally omit %_ASM_AX and %_ASM_SP as + * context switched by hardware + */ + LOAD_REGS %_ASM_DI, SVM_vcpu_arch_regs, \ + %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_DI), %r8 - mov VCPU_R9 (%_ASM_DI), %r9 - mov VCPU_R10(%_ASM_DI), %r10 - mov VCPU_R11(%_ASM_DI), %r11 - mov VCPU_R12(%_ASM_DI), %r12 - mov VCPU_R13(%_ASM_DI), %r13 - mov VCPU_R14(%_ASM_DI), %r14 - mov VCPU_R15(%_ASM_DI), %r15 + LOAD_REGS %_ASM_DI, SVM_vcpu_arch_regs, \ + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 #endif - mov VCPU_RDI(%_ASM_DI), %_ASM_DI + LOAD_REGS %_ASM_DI, SVM_vcpu_arch_regs, %_ASM_DI /* Clobbers EFLAGS.ZF */ SVM_CLEAR_CPU_BUFFERS @@ -135,22 +109,15 @@ SYM_FUNC_START(__svm_vcpu_run) /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX - /* Save all guest registers. */ - mov %_ASM_CX, VCPU_RCX(%_ASM_AX) - mov %_ASM_DX, VCPU_RDX(%_ASM_AX) - mov %_ASM_BX, VCPU_RBX(%_ASM_AX) - mov %_ASM_BP, VCPU_RBP(%_ASM_AX) - mov %_ASM_SI, VCPU_RSI(%_ASM_AX) - mov %_ASM_DI, VCPU_RDI(%_ASM_AX) + /* + * Save all guest registers. Intentionally omit %_ASM_AX and %_ASM_SP as + * context switched by hardware + */ + STORE_REGS %_ASM_AX, SVM_vcpu_arch_regs, \ + %_ASM_CX, %_ASM_DX, %_ASM_BX, %_ASM_BP, %_ASM_SI, %_ASM_DI #ifdef CONFIG_X86_64 - mov %r8, VCPU_R8 (%_ASM_AX) - mov %r9, VCPU_R9 (%_ASM_AX) - mov %r10, VCPU_R10(%_ASM_AX) - mov %r11, VCPU_R11(%_ASM_AX) - mov %r12, VCPU_R12(%_ASM_AX) - mov %r13, VCPU_R13(%_ASM_AX) - mov %r14, VCPU_R14(%_ASM_AX) - mov %r15, VCPU_R15(%_ASM_AX) + STORE_REGS %_ASM_AX, SVM_vcpu_arch_regs, \ + %r8, %r9, %r10, %r11, %r12, %r13, %r14, %r15 #endif /* @svm can stay in RDI from now on. */ @@ -193,21 +160,9 @@ SYM_FUNC_START(__svm_vcpu_run) * free. RSP and RAX are exempt as they are restored by hardware * during VM-Exit. */ - xor %ecx, %ecx - xor %edx, %edx - xor %ebx, %ebx - xor %ebp, %ebp - xor %esi, %esi - xor %edi, %edi + CLEAR_REGS %ecx, %edx, %ebx, %ebp, %esi, %edi #ifdef CONFIG_X86_64 - xor %r8d, %r8d - xor %r9d, %r9d - xor %r10d, %r10d - xor %r11d, %r11d - xor %r12d, %r12d - xor %r13d, %r13d - xor %r14d, %r14d - xor %r15d, %r15d + CLEAR_REGS %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d #endif /* "Pop" @enter_flags. */ -- 2.52.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] KVM: SEV: Macrofy GPR swapping in __svm_sev_es_vcpu_run() 2026-05-13 17:49 [PATCH 0/3] KVM: Macrofy GPR swapping in assembly code Paolo Bonzini 2026-05-13 17:49 ` [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() Paolo Bonzini 2026-05-13 17:49 ` [PATCH 2/3] KVM: SVM: Macrofy GPR swapping in __svm_vcpu_run() Paolo Bonzini @ 2026-05-13 17:49 ` Paolo Bonzini 2 siblings, 0 replies; 5+ messages in thread From: Paolo Bonzini @ 2026-05-13 17:49 UTC (permalink / raw) To: linux-kernel, kvm; +Cc: chang.seok.bae From: "Chang S. Bae" <chang.seok.bae@intel.com> Convert the SEV-ES entry code to use macros for saving guest GPRs, following VMX/SVM paths. Drop now-unused register offsets and __VCPU_REGS_* defines. No functional change intended. Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com> Message-ID: <20260512011502.53072-4-chang.seok.bae@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/include/asm/kvm_host.h | 16 ++++++++-------- arch/x86/include/asm/kvm_vcpu_regs.h | 11 ----------- arch/x86/kvm/svm/vmenter.S | 21 ++------------------- 3 files changed, 10 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8a53ca619570..53527b0550c7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -182,14 +182,14 @@ enum kvm_reg { VCPU_REGS_RSI = __VCPU_REGS_RSI, VCPU_REGS_RDI = __VCPU_REGS_RDI, #ifdef CONFIG_X86_64 - VCPU_REGS_R8 = __VCPU_REGS_R8, - VCPU_REGS_R9 = __VCPU_REGS_R9, - VCPU_REGS_R10 = __VCPU_REGS_R10, - VCPU_REGS_R11 = __VCPU_REGS_R11, - VCPU_REGS_R12 = __VCPU_REGS_R12, - VCPU_REGS_R13 = __VCPU_REGS_R13, - VCPU_REGS_R14 = __VCPU_REGS_R14, - VCPU_REGS_R15 = __VCPU_REGS_R15, + VCPU_REGS_R8 = 8, + VCPU_REGS_R9, + VCPU_REGS_R10, + VCPU_REGS_R11, + VCPU_REGS_R12, + VCPU_REGS_R13, + VCPU_REGS_R14, + VCPU_REGS_R15, #endif NR_VCPU_GENERAL_PURPOSE_REGS, diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h index 7c12a1fd1ffc..22efe651c9fc 100644 --- a/arch/x86/include/asm/kvm_vcpu_regs.h +++ b/arch/x86/include/asm/kvm_vcpu_regs.h @@ -11,17 +11,6 @@ #define __VCPU_REGS_RSI 6 #define __VCPU_REGS_RDI 7 -#ifdef CONFIG_X86_64 -#define __VCPU_REGS_R8 8 -#define __VCPU_REGS_R9 9 -#define __VCPU_REGS_R10 10 -#define __VCPU_REGS_R11 11 -#define __VCPU_REGS_R12 12 -#define __VCPU_REGS_R13 13 -#define __VCPU_REGS_R14 14 -#define __VCPU_REGS_R15 15 -#endif - #ifdef __ASSEMBLER__ #define REG_NUM_INVALID 100 diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 06a8cb58f187..99701892f8ec 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -211,18 +211,7 @@ SYM_FUNC_END(__svm_vcpu_run) #ifdef CONFIG_KVM_AMD_SEV - -#ifdef CONFIG_X86_64 #define SEV_ES_GPRS_BASE 0x300 -#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE) -#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE) -#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE) -#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE) -#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE) -#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE) -#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE) -#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE) -#endif /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode @@ -237,19 +226,13 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not * saved on VMRUN. */ - mov %rbp, SEV_ES_RBP (%rdx) - mov %r15, SEV_ES_R15 (%rdx) - mov %r14, SEV_ES_R14 (%rdx) - mov %r13, SEV_ES_R13 (%rdx) - mov %r12, SEV_ES_R12 (%rdx) - mov %rbx, SEV_ES_RBX (%rdx) + STORE_REGS %rdx, SEV_ES_GPRS_BASE, %rbp, %r15, %r14, %r13, %r12, %rbx /* * Save volatile registers that hold arguments that are needed after * #VMEXIT (RDI=@svm and RSI=@enter_flags). */ - mov %rdi, SEV_ES_RDI (%rdx) - mov %rsi, SEV_ES_RSI (%rdx) + STORE_REGS %rdx, SEV_ES_GPRS_BASE, %rdi, %rsi /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */ RESTORE_GUEST_SPEC_CTRL -- 2.52.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-05-14 19:30 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-05-13 17:49 [PATCH 0/3] KVM: Macrofy GPR swapping in assembly code Paolo Bonzini 2026-05-13 17:49 ` [PATCH 1/3] KVM: VMX: Macrofy GPR swapping in __vmx_vcpu_run() Paolo Bonzini 2026-05-14 19:30 ` Sean Christopherson 2026-05-13 17:49 ` [PATCH 2/3] KVM: SVM: Macrofy GPR swapping in __svm_vcpu_run() Paolo Bonzini 2026-05-13 17:49 ` [PATCH 3/3] KVM: SEV: Macrofy GPR swapping in __svm_sev_es_vcpu_run() Paolo Bonzini
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox