* [PATCH v4 1/4] KVM: x86: refactor req_immediate_exit logic
2023-12-05 10:37 [PATCH v4 0/4] KVM: x86: tracepoint updates Maxim Levitsky
@ 2023-12-05 10:37 ` Maxim Levitsky
2023-12-05 17:23 ` Sean Christopherson
2023-12-05 10:37 ` [PATCH v4 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Maxim Levitsky @ 2023-12-05 10:37 UTC (permalink / raw)
To: kvm
Cc: Sean Christopherson, Dave Hansen, Ingo Molnar, x86, linux-kernel,
Paolo Bonzini, Borislav Petkov, Thomas Gleixner, H. Peter Anvin,
Maxim Levitsky
- move req_immediate_exit variable from arch specific to common code.
- remove arch specific callback .request_immediate_exit and move the code
down to the arch's vcpu_run's code.
No functional change is intended.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 -
arch/x86/include/asm/kvm_host.h | 5 ++---
arch/x86/kvm/svm/svm.c | 7 ++++---
arch/x86/kvm/vmx/vmx.c | 18 ++++++-----------
arch/x86/kvm/vmx/vmx.h | 2 --
arch/x86/kvm/x86.c | 31 +++++++++++++-----------------
6 files changed, 25 insertions(+), 39 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 26b628d84594b93..3aeb7c669a0b09b 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -101,7 +101,6 @@ KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d7036982332e33d..044b4f9265c5427 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1021,6 +1021,8 @@ struct kvm_vcpu_arch {
*/
bool pdptrs_from_userspace;
+ bool req_immediate_exit;
+
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
#endif
@@ -1700,8 +1702,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
/*
@@ -2187,7 +2187,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1855a6d7c976ad2..d2c6ff9036009dd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4137,9 +4137,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* is enough to force an immediate vmexit.
*/
disable_nmi_singlestep(svm);
- smp_send_reschedule(vcpu->cpu);
+ vcpu->arch.req_immediate_exit = true;
}
+ if (vcpu->arch.req_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
+
pre_svm_run(vcpu);
sync_lapic_to_cr8(vcpu);
@@ -4995,8 +4998,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_intercept = svm_check_intercept,
.handle_exit_irqoff = svm_handle_exit_irqoff,
- .request_immediate_exit = __kvm_request_immediate_exit,
-
.sched_in = svm_sched_in,
.nested_ops = &svm_nested_ops,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index be20a60047b1f29..b8fa16f9e621878 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -67,6 +67,8 @@
#include "x86.h"
#include "smm.h"
+#include <trace/events/ipi.h>
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -1288,8 +1290,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
- vmx->req_immediate_exit = false;
-
/*
* Note that guest MSRs to be saved/restored can also be changed
* when guest state is loaded. This happens when guest transitions
@@ -5996,7 +5996,7 @@ static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!vmx->req_immediate_exit &&
+ if (!vcpu->arch.req_immediate_exit &&
!unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
kvm_lapic_expired_hv_timer(vcpu);
return EXIT_FASTPATH_REENTER_GUEST;
@@ -7154,7 +7154,7 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u64 tscl;
u32 delta_tsc;
- if (vmx->req_immediate_exit) {
+ if (vcpu->arch.req_immediate_exit) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
@@ -7357,6 +7357,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
+ else if (vcpu->arch.req_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
kvm_wait_lapic_expire(vcpu);
@@ -7899,11 +7901,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- to_vmx(vcpu)->req_immediate_exit = true;
-}
-
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
{
@@ -8312,8 +8309,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.check_intercept = vmx_check_intercept,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
- .request_immediate_exit = vmx_request_immediate_exit,
-
.sched_in = vmx_sched_in,
.cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8571,7 +8566,6 @@ static __init int hardware_setup(void)
if (!enable_preemption_timer) {
vmx_x86_ops.set_hv_timer = NULL;
vmx_x86_ops.cancel_hv_timer = NULL;
- vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_caps.supported_mce_cap |= MCG_LMCE_P;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index c2130d2c8e24bb5..4dabd16a3d7180e 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -330,8 +330,6 @@ struct vcpu_vmx {
unsigned int ple_window;
bool ple_window_dirty;
- bool req_immediate_exit;
-
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c924075f6f112a..2089a0b08ce08c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10179,8 +10179,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
* ordering between that side effect, the instruction completing, _and_ the
* delivery of the asynchronous event.
*/
-static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
- bool *req_immediate_exit)
+static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu)
{
bool can_inject;
int r;
@@ -10357,8 +10356,9 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->has_events &&
- kvm_x86_ops.nested_ops->has_events(vcpu))
- *req_immediate_exit = true;
+ kvm_x86_ops.nested_ops->has_events(vcpu)) {
+ vcpu->arch.req_immediate_exit = true;
+ }
/*
* KVM must never queue a new exception while injecting an event; KVM
@@ -10375,10 +10375,9 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
WARN_ON_ONCE(vcpu->arch.exception.pending ||
vcpu->arch.exception_vmexit.pending);
return 0;
-
out:
if (r == -EBUSY) {
- *req_immediate_exit = true;
+ vcpu->arch.req_immediate_exit = true;
r = 0;
}
return r;
@@ -10605,12 +10604,6 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
}
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- smp_send_reschedule(vcpu->cpu);
-}
-EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
-
/*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10625,7 +10618,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_cpu_accept_dm_intr(vcpu);
fastpath_t exit_fastpath;
- bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
@@ -10787,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- r = kvm_check_and_inject_events(vcpu, &req_immediate_exit);
+ r = kvm_check_and_inject_events(vcpu);
if (r < 0) {
r = 0;
goto out;
@@ -10856,10 +10848,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- if (req_immediate_exit) {
+
+ if (vcpu->arch.req_immediate_exit)
kvm_make_request(KVM_REQ_EVENT, vcpu);
- static_call(kvm_x86_request_immediate_exit)(vcpu);
- }
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -10891,6 +10882,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
@@ -10906,6 +10898,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++vcpu->stat.exits;
}
+ vcpu->arch.req_immediate_exit = false;
/*
* Do this here before restoring debug registers on the host. And
* since we do this before handling the vmexit, a DR access vmexit
@@ -10993,8 +10986,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
return r;
cancel_injection:
- if (req_immediate_exit)
+ if (vcpu->arch.req_immediate_exit) {
+ vcpu->arch.req_immediate_exit = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
static_call(kvm_x86_cancel_injection)(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
--
2.26.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH v4 1/4] KVM: x86: refactor req_immediate_exit logic
2023-12-05 10:37 ` [PATCH v4 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
@ 2023-12-05 17:23 ` Sean Christopherson
0 siblings, 0 replies; 6+ messages in thread
From: Sean Christopherson @ 2023-12-05 17:23 UTC (permalink / raw)
To: Maxim Levitsky
Cc: kvm, Dave Hansen, Ingo Molnar, x86, linux-kernel, Paolo Bonzini,
Borislav Petkov, Thomas Gleixner, H. Peter Anvin
On Tue, Dec 05, 2023, Maxim Levitsky wrote:
> - move req_immediate_exit variable from arch specific to common code.
> - remove arch specific callback .request_immediate_exit and move the code
> down to the arch's vcpu_run's code.
>
> No functional change is intended.
Ooh, but there is a very subtle change here that could impact functionality. This
moves the sending of the self-IPI much closer to VM-Enter. In practice that is
extremely unlikely to matter, but in theory it ever so slightly increases the
danger of the IRQ not being pended before VM-Enter. I am not saying we can't do
the move, but it should be done in a fairly isolated patch.
And I am strongly opposed to moving req_immediate_exit into kvm_vcpu_arch. The
flag in vcpu_vmx is bad enough, but at least that one is decently well guarded
in the sense that's its unconditionally cleared in vmx_prepare_switch_to_guest().
I would much rather go in the opposite direction and turn req_immediate_exit into
a local variable that is passed down the stack as needed.
Somewhat of a side topic, I think we should name the variable that's passed down
the stack "force_immediate_exit". I suspect the "request" verbiage came from the
fact that the immediate exit is triggered in order to service a request. But as
far as the the vendor's .vcpu_run() is concerned, an immediate exit isn't being
requested, it's being forced.
The VMX preemption timer code in particular can benefit from related cleanups, as
the fastpath vs. slowpath code is super confusing (the slowpath _looks_ redundant,
but it's subtly necessary to handle exits from L2). With ~5 patches of cleanup,
the VMX code can be made to look like this, i.e. can eliminate the vcpu_vmx flag
and also handle forced exits from L2 in the fastpath.
static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* In the *extremely* unlikely scenario that this is a spurious VM-Exit
* due to the timer expiring while it was "soft" disabled, just eat the
* exit and re-enter the guest.
*/
if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
return EXIT_FASTPATH_REENTER_GUEST;
/*
* If the timer expired because KVM used it to force an immediate exit,
* then mission accomplished.
*/
if (force_immediate_exit)
return EXIT_FASTPATH_EXIT_HANDLED;
/*
* If L2 is active, go down the slow path as emulating the guest timer
* expiration likely requires synthesizing a nested VM-Exit.
*/
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
kvm_lapic_expired_hv_timer(vcpu);
return EXIT_FASTPATH_REENTER_GUEST;
}
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
/*
* This non-fastpath handler is reached if and only if the preemption
* timer was being used to emulate a guest timer while L2 is active.
* All other scenarios are supposed to be handled in the fastpath.
*/
WARN_ON_ONCE(!is_guest_mode(vcpu));
kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
For this series, the minimal change is to plumb the flag into track_kvm_entry()
in a standalone patch. The below is compile tested only.
Holler if you want me to post an RFC for the full series I have in mind, e.g. if
it's hard to see how the below leads to a sane end result. I have the code written
and compile tested, but I'm going to be buried in non-upstream stuff for a few
days and probably won't get around to fully testing and writing changelogs until
later this week.
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/svm/svm.c | 5 +++--
arch/x86/kvm/trace.h | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 4 ++--
arch/x86/kvm/x86.c | 2 +-
5 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c8c7e2475a18..aee423e6c6f0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1652,7 +1652,8 @@ struct kvm_x86_ops {
void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
- enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+ enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c46f07b28230..33f3db2058fd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4112,12 +4112,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_exit_irqoff();
}
-static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
- trace_kvm_entry(vcpu);
+ trace_kvm_entry(vcpu, force_immediate_exit);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 83843379813e..88659de4d2a7 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -15,20 +15,23 @@
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_entry,
- TP_PROTO(struct kvm_vcpu *vcpu),
- TP_ARGS(vcpu),
+ TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit),
+ TP_ARGS(vcpu, force_immediate_exit),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
+ __field( bool, immediate_exit )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
+ __entry->immediate_exit = force_immediate_exit;
),
- TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+ TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+ __entry->immediate_exit ? "[immediate exit]" : "")
);
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d30df9b3fe3e..dc0d986e35c3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7270,7 +7270,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_exit_irqoff();
}
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4;
@@ -7297,7 +7297,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
- trace_kvm_entry(vcpu);
+ trace_kvm_entry(vcpu, force_immediate_exit);
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1983947b8965..f1d9828abeba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10916,7 +10916,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
- exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+ exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu, req_immediate_exit);
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
base-commit: 1ab097653e4dd8d23272d028a61352c23486fd4a
--
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v4 2/4] KVM: x86: add more information to the kvm_entry tracepoint
2023-12-05 10:37 [PATCH v4 0/4] KVM: x86: tracepoint updates Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
@ 2023-12-05 10:37 ` Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 3/4] KVM: x86: add information about pending requests to kvm_exit tracepoint Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
3 siblings, 0 replies; 6+ messages in thread
From: Maxim Levitsky @ 2023-12-05 10:37 UTC (permalink / raw)
To: kvm
Cc: Sean Christopherson, Dave Hansen, Ingo Molnar, x86, linux-kernel,
Paolo Bonzini, Borislav Petkov, Thomas Gleixner, H. Peter Anvin,
Maxim Levitsky
Add VMX/SVM specific interrupt injection info to vm entry tracepoint.
Also add a flag showing that immediate vm exit is set to happen after
the entry.
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 5 ++++-
arch/x86/kvm/svm/svm.c | 17 +++++++++++++++++
arch/x86/kvm/trace.h | 15 +++++++++++++--
arch/x86/kvm/vmx/vmx.c | 12 ++++++++++++
5 files changed, 47 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 3aeb7c669a0b09b..68fc0ef91ccbedc 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP(sched_in)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 044b4f9265c5427..97a5fecc16c8542 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1689,13 +1689,16 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
u32 *exit_int_info, u32 *exit_int_info_err_code);
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *inj_info, u32 *inj_info_error_code);
+
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d2c6ff9036009dd..ad27e07da767f99 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3492,6 +3492,22 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
+static void svm_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ *inj_info = control->event_inj;
+
+ if ((*inj_info & SVM_EXITINTINFO_VALID) &&
+ (*inj_info & SVM_EXITINTINFO_VALID_ERR))
+ *inj_info_error_code = control->event_inj_err;
+ else
+ *inj_info_error_code = 0;
+
+}
+
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4983,6 +4999,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
+ .get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 83843379813ee3e..28e8a63368cc021 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -21,14 +21,25 @@ TRACE_EVENT(kvm_entry,
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
- ),
+ __field( u32, inj_info )
+ __field( u32, inj_info_err )
+ __field( bool, req_imm_exit )
+ ),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
+ __entry->req_imm_exit = vcpu->arch.req_immediate_exit;
+
+ static_call(kvm_x86_get_entry_info)(vcpu,
+ &__entry->inj_info,
+ &__entry->inj_info_err);
),
- TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+ TP_printk("vcpu %u, rip 0x%lx inj 0x%08x inj_error_code 0x%08x%s",
+ __entry->vcpu_id, __entry->rip,
+ __entry->inj_info, __entry->inj_info_err,
+ __entry->req_imm_exit ? " [req_imm_exit]" : "")
);
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b8fa16f9e621878..2d78c94e464295d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6156,6 +6156,17 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
+static void vmx_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ *inj_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if (is_exception_with_error_code(*inj_info))
+ *inj_info_error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ else
+ *inj_info_error_code = 0;
+}
+
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@@ -8294,6 +8305,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
+ .get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
--
2.26.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v4 3/4] KVM: x86: add information about pending requests to kvm_exit tracepoint
2023-12-05 10:37 [PATCH v4 0/4] KVM: x86: tracepoint updates Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
@ 2023-12-05 10:37 ` Maxim Levitsky
2023-12-05 10:37 ` [PATCH v4 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
3 siblings, 0 replies; 6+ messages in thread
From: Maxim Levitsky @ 2023-12-05 10:37 UTC (permalink / raw)
To: kvm
Cc: Sean Christopherson, Dave Hansen, Ingo Molnar, x86, linux-kernel,
Paolo Bonzini, Borislav Petkov, Thomas Gleixner, H. Peter Anvin,
Maxim Levitsky
This allows to gather information on how often kvm interrupts vCPUs due
to specific requests.
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/trace.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 28e8a63368cc021..e275a02a21e5233 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -316,12 +316,14 @@ TRACE_EVENT(name, \
__field( u32, intr_info ) \
__field( u32, error_code ) \
__field( unsigned int, vcpu_id ) \
+ __field( u64, requests ) \
), \
\
TP_fast_assign( \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
+ __entry->requests = READ_ONCE(vcpu->requests); \
static_call(kvm_x86_get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
@@ -331,11 +333,13 @@ TRACE_EVENT(name, \
), \
\
TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
- "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
+ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
+ "requests 0x%016llx", \
__entry->vcpu_id, \
kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
__entry->guest_rip, __entry->info1, __entry->info2, \
- __entry->intr_info, __entry->error_code) \
+ __entry->intr_info, __entry->error_code, \
+ __entry->requests) \
)
/*
--
2.26.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v4 4/4] KVM: x86: add new nested vmexit tracepoints
2023-12-05 10:37 [PATCH v4 0/4] KVM: x86: tracepoint updates Maxim Levitsky
` (2 preceding siblings ...)
2023-12-05 10:37 ` [PATCH v4 3/4] KVM: x86: add information about pending requests to kvm_exit tracepoint Maxim Levitsky
@ 2023-12-05 10:37 ` Maxim Levitsky
3 siblings, 0 replies; 6+ messages in thread
From: Maxim Levitsky @ 2023-12-05 10:37 UTC (permalink / raw)
To: kvm
Cc: Sean Christopherson, Dave Hansen, Ingo Molnar, x86, linux-kernel,
Paolo Bonzini, Borislav Petkov, Thomas Gleixner, H. Peter Anvin,
Maxim Levitsky
Add 3 new tracepoints for nested VM exits which are intended
to capture extra information to gain insights about the nested guest
behavior.
The new tracepoints are:
- kvm_nested_msr
- kvm_nested_hypercall
These tracepoints capture extra register state to be able to know
which MSR or which hypercall was done.
- kvm_nested_page_fault
This tracepoint allows to capture extra info about which host pagefault
error code caused the nested page fault.
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/svm/nested.c | 22 +++++++++++
arch/x86/kvm/trace.h | 82 +++++++++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
arch/x86/kvm/x86.c | 3 ++
4 files changed, 131 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3fea8c47679e689..05fc257818fc112 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
+ u64 host_error_code = vmcb->control.exit_info_1;
+
if (vmcb->control.exit_code != SVM_EXIT_NPF) {
/*
@@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
vmcb->control.exit_code_hi = 0;
vmcb->control.exit_info_1 = (1ULL << 32);
vmcb->control.exit_info_2 = fault->address;
+ host_error_code = 0;
}
vmcb->control.exit_info_1 &= ~0xffffffffULL;
vmcb->control.exit_info_1 |= fault->error_code;
+ trace_kvm_nested_page_fault(fault->address, host_error_code,
+ fault->error_code);
+
nested_svm_vmexit(svm);
}
@@ -1139,6 +1145,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.exit_int_info_err,
KVM_ISA_SVM);
+ /* Collect some info about nested VM exits */
+ switch (vmcb12->control.exit_code) {
+ case SVM_EXIT_MSR:
+ trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
+ kvm_rcx_read(vcpu),
+ (vmcb12->save.rax & 0xFFFFFFFFull) |
+ (((u64)kvm_rdx_read(vcpu) << 32)));
+ break;
+ case SVM_EXIT_VMMCALL:
+ trace_kvm_nested_hypercall(vmcb12->save.rax,
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+ }
+
kvm_vcpu_unmap(vcpu, &map, true);
nested_svm_transition_tlb_flush(vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e275a02a21e5233..782c435bddfd454 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -610,7 +610,7 @@ TRACE_EVENT(kvm_pv_eoi,
);
/*
- * Tracepoint for nested VMRUN
+ * Tracepoint for nested VMRUN/VMENTER
*/
TRACE_EVENT(kvm_nested_vmenter,
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
@@ -743,8 +743,84 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip: 0x%016llx", __entry->rip)
);
+
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for nested guest MSR access.
+ */
+TRACE_EVENT(kvm_nested_msr,
+ TP_PROTO(bool write, u32 ecx, u64 data),
+ TP_ARGS(write, ecx, data),
+
+ TP_STRUCT__entry(
+ __field( bool, write )
+ __field( u32, ecx )
+ __field( u64, data )
+ ),
+
+ TP_fast_assign(
+ __entry->write = write;
+ __entry->ecx = ecx;
+ __entry->data = data;
+ ),
+
+ TP_printk("msr_%s %x = 0x%llx",
+ __entry->write ? "write" : "read",
+ __entry->ecx, __entry->data)
+);
+
+/*
+ * Tracepoint for nested hypercalls, capturing generic info about the
+ * hypercall
+ */
+
+TRACE_EVENT(kvm_nested_hypercall,
+ TP_PROTO(u64 rax, u64 rbx, u64 rcx, u64 rdx),
+ TP_ARGS(rax, rbx, rcx, rdx),
+
+ TP_STRUCT__entry(
+ __field( u64, rax )
+ __field( u64, rbx )
+ __field( u64, rcx )
+ __field( u64, rdx )
+ ),
+
+ TP_fast_assign(
+ __entry->rax = rax;
+ __entry->rbx = rbx;
+ __entry->rcx = rcx;
+ __entry->rdx = rdx;
+ ),
+
+ TP_printk("rax 0x%llx rbx 0x%llx rcx 0x%llx rdx 0x%llx",
+ __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx)
+);
+
+
+TRACE_EVENT(kvm_nested_page_fault,
+ TP_PROTO(u64 gpa, u64 host_error_code, u64 guest_error_code),
+ TP_ARGS(gpa, host_error_code, guest_error_code),
+
+ TP_STRUCT__entry(
+ __field( u64, gpa )
+ __field( u64, host_error_code )
+ __field( u64, guest_errror_code )
+ ),
+
+ TP_fast_assign(
+ __entry->gpa = gpa;
+ __entry->host_error_code = host_error_code;
+ __entry->guest_errror_code = guest_error_code;
+ ),
+
+ TP_printk("gpa 0x%llx host err 0x%llx guest err 0x%llx",
+ __entry->gpa,
+ __entry->host_error_code,
+ __entry->guest_errror_code)
+);
+
+
+/*
+ * Tracepoint for invlpga
*/
TRACE_EVENT(kvm_invlpga,
TP_PROTO(__u64 rip, int asid, u64 address),
@@ -767,7 +843,7 @@ TRACE_EVENT(kvm_invlpga,
);
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for skinit
*/
TRACE_EVENT(kvm_skinit,
TP_PROTO(__u64 rip, __u32 slb),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c5ec0ef51ff78fa..23325a7af8fc2ab 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -402,6 +402,16 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
*/
nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
fault->address);
+
+ /*
+ * vmx_get_exit_qual() returns the original exit qualification,
+ * before it was overridden with exit qualification that
+ * is about to be injected to the guest.
+ */
+
+ trace_kvm_nested_page_fault(fault->address,
+ vmx_get_exit_qual(vcpu),
+ exit_qualification);
}
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
@@ -4877,6 +4887,23 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmcs12->vm_exit_intr_error_code,
KVM_ISA_VMX);
+ switch ((u16)vmcs12->vm_exit_reason) {
+ case EXIT_REASON_MSR_READ:
+ case EXIT_REASON_MSR_WRITE:
+ trace_kvm_nested_msr(vmcs12->vm_exit_reason == EXIT_REASON_MSR_WRITE,
+ kvm_rcx_read(vcpu),
+ (kvm_rax_read(vcpu) & 0xFFFFFFFFull) |
+ (((u64)kvm_rdx_read(vcpu)) << 32));
+ break;
+ case EXIT_REASON_VMCALL:
+ trace_kvm_nested_hypercall(kvm_rax_read(vcpu),
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+
+ }
+
load_vmcs12_host_state(vcpu, vmcs12);
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2089a0b08ce08c0..c75001abca8e835 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13787,6 +13787,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_hypercall);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
--
2.26.3
^ permalink raw reply related [flat|nested] 6+ messages in thread