* [PATCH v2 0/4] KVM: x86: tracepoint updates
@ 2023-09-24 12:44 Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
` (4 more replies)
0 siblings, 5 replies; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-24 12:44 UTC (permalink / raw)
To: kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Paolo Bonzini, Ingo Molnar, Thomas Gleixner,
Borislav Petkov, Maxim Levitsky
This patch series is intended to add some selected information
to the kvm tracepoints to make it easier to gather insights about
running nested guests.
This patch series was developed together with a new x86 performance analysis tool
that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
which aims to be a better kvm_stat, and allows you at glance
to see what is happening in a VM, including nesting.
Best regards,
Maxim Levitsky
Maxim Levitsky (4):
KVM: x86: refactor req_immediate_exit logic
KVM: x86: add more information to the kvm_entry tracepoint
KVM: x86: add more information to kvm_exit tracepoint
KVM: x86: add new nested vmexit tracepoints
arch/x86/include/asm/kvm-x86-ops.h | 2 +-
arch/x86/include/asm/kvm_host.h | 11 ++-
arch/x86/kvm/svm/nested.c | 22 ++++++
arch/x86/kvm/svm/svm.c | 22 +++++-
arch/x86/kvm/trace.h | 115 +++++++++++++++++++++++++++--
arch/x86/kvm/vmx/nested.c | 21 ++++++
arch/x86/kvm/vmx/vmx.c | 31 +++++---
arch/x86/kvm/vmx/vmx.h | 2 -
arch/x86/kvm/x86.c | 34 ++++-----
9 files changed, 214 insertions(+), 46 deletions(-)
--
2.26.3
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
@ 2023-09-24 12:44 ` Maxim Levitsky
2023-09-26 16:37 ` Paolo Bonzini
2023-09-24 12:44 ` [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
` (3 subsequent siblings)
4 siblings, 1 reply; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-24 12:44 UTC (permalink / raw)
To: kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Paolo Bonzini, Ingo Molnar, Thomas Gleixner,
Borislav Petkov, Maxim Levitsky
- move req_immediate_exit variable from arch specific to common code.
- remove arch specific callback .request_immediate_exit and move the code
down to the arch's vcpu_run's code.
No functional change is intended.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 -
arch/x86/include/asm/kvm_host.h | 5 ++---
arch/x86/kvm/svm/svm.c | 5 +++--
arch/x86/kvm/vmx/vmx.c | 18 ++++++-----------
arch/x86/kvm/vmx/vmx.h | 2 --
arch/x86/kvm/x86.c | 31 +++++++++++++-----------------
6 files changed, 24 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index e3054e3e46d52d..f654a7f4cc8c0c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -101,7 +101,6 @@ KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d5d..383a1d0cc0743b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1011,6 +1011,8 @@ struct kvm_vcpu_arch {
*/
bool pdptrs_from_userspace;
+ bool req_immediate_exit;
+
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
#endif
@@ -1690,8 +1692,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
/*
@@ -2176,7 +2176,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9507df93f410a6..60b130b7f9d510 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4176,6 +4176,9 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
kvm_load_guest_xsave_state(vcpu);
+ if (vcpu->arch.req_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
+
kvm_wait_lapic_expire(vcpu);
/*
@@ -5004,8 +5007,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_intercept = svm_check_intercept,
.handle_exit_irqoff = svm_handle_exit_irqoff,
- .request_immediate_exit = __kvm_request_immediate_exit,
-
.sched_in = svm_sched_in,
.nested_ops = &svm_nested_ops,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 72e3943f36935c..eb7e42235e8811 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -67,6 +67,8 @@
#include "x86.h"
#include "smm.h"
+#include <trace/events/ipi.h>
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -1288,8 +1290,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
- vmx->req_immediate_exit = false;
-
/*
* Note that guest MSRs to be saved/restored can also be changed
* when guest state is loaded. This happens when guest transitions
@@ -5996,7 +5996,7 @@ static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!vmx->req_immediate_exit &&
+ if (!vcpu->arch.req_immediate_exit &&
!unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
kvm_lapic_expired_hv_timer(vcpu);
return EXIT_FASTPATH_REENTER_GUEST;
@@ -7154,7 +7154,7 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u64 tscl;
u32 delta_tsc;
- if (vmx->req_immediate_exit) {
+ if (vcpu->arch.req_immediate_exit) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
@@ -7357,6 +7357,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
+ else if (vcpu->arch.req_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
kvm_wait_lapic_expire(vcpu);
@@ -7902,11 +7904,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- to_vmx(vcpu)->req_immediate_exit = true;
-}
-
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
{
@@ -8315,8 +8312,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.check_intercept = vmx_check_intercept,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
- .request_immediate_exit = vmx_request_immediate_exit,
-
.sched_in = vmx_sched_in,
.cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8574,7 +8569,6 @@ static __init int hardware_setup(void)
if (!enable_preemption_timer) {
vmx_x86_ops.set_hv_timer = NULL;
vmx_x86_ops.cancel_hv_timer = NULL;
- vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_caps.supported_mce_cap |= MCG_LMCE_P;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index c2130d2c8e24bb..4dabd16a3d7180 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -330,8 +330,6 @@ struct vcpu_vmx {
unsigned int ple_window;
bool ple_window_dirty;
- bool req_immediate_exit;
-
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda66b..dfb7d25ed94f26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10049,8 +10049,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
* ordering between that side effect, the instruction completing, _and_ the
* delivery of the asynchronous event.
*/
-static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
- bool *req_immediate_exit)
+static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu)
{
bool can_inject;
int r;
@@ -10227,8 +10226,9 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->has_events &&
- kvm_x86_ops.nested_ops->has_events(vcpu))
- *req_immediate_exit = true;
+ kvm_x86_ops.nested_ops->has_events(vcpu)) {
+ vcpu->arch.req_immediate_exit = true;
+ }
/*
* KVM must never queue a new exception while injecting an event; KVM
@@ -10245,10 +10245,9 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
WARN_ON_ONCE(vcpu->arch.exception.pending ||
vcpu->arch.exception_vmexit.pending);
return 0;
-
out:
if (r == -EBUSY) {
- *req_immediate_exit = true;
+ vcpu->arch.req_immediate_exit = true;
r = 0;
}
return r;
@@ -10475,12 +10474,6 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
}
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- smp_send_reschedule(vcpu->cpu);
-}
-EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
-
/*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10495,7 +10488,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_cpu_accept_dm_intr(vcpu);
fastpath_t exit_fastpath;
- bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
@@ -10657,7 +10649,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- r = kvm_check_and_inject_events(vcpu, &req_immediate_exit);
+ r = kvm_check_and_inject_events(vcpu);
if (r < 0) {
r = 0;
goto out;
@@ -10726,10 +10718,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- if (req_immediate_exit) {
+
+ if (vcpu->arch.req_immediate_exit)
kvm_make_request(KVM_REQ_EVENT, vcpu);
- static_call(kvm_x86_request_immediate_exit)(vcpu);
- }
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -10761,6 +10752,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
@@ -10776,6 +10768,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++vcpu->stat.exits;
}
+ vcpu->arch.req_immediate_exit = false;
/*
* Do this here before restoring debug registers on the host. And
* since we do this before handling the vmexit, a DR access vmexit
@@ -10863,8 +10856,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
return r;
cancel_injection:
- if (req_immediate_exit)
+ if (vcpu->arch.req_immediate_exit) {
+ vcpu->arch.req_immediate_exit = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
static_call(kvm_x86_cancel_injection)(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
--
2.26.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
@ 2023-09-24 12:44 ` Maxim Levitsky
2023-09-26 16:39 ` Paolo Bonzini
2023-09-24 12:44 ` [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint Maxim Levitsky
` (2 subsequent siblings)
4 siblings, 1 reply; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-24 12:44 UTC (permalink / raw)
To: kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Paolo Bonzini, Ingo Molnar, Thomas Gleixner,
Borislav Petkov, Maxim Levitsky
Add:
- Flag showing that VM is in a guest mode on entry.
- Flag showing that immediate vm exit is set to happen after the entry.
- VMX/SVM specific interrupt injection info (like in a vm exit).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 5 ++++-
arch/x86/kvm/svm/svm.c | 17 +++++++++++++++++
arch/x86/kvm/trace.h | 19 +++++++++++++++++--
arch/x86/kvm/vmx/vmx.c | 12 ++++++++++++
5 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index f654a7f4cc8c0c..346fed6e3c33aa 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP(sched_in)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 383a1d0cc0743b..321721813474f7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1679,13 +1679,16 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
u32 *exit_int_info, u32 *exit_int_info_err_code);
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *inj_info, u32 *inj_info_error_code);
+
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 60b130b7f9d510..cd65c04be3d0e2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3504,6 +3504,22 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
+static void svm_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ *inj_info = control->event_inj;
+
+ if ((*inj_info & SVM_EXITINTINFO_VALID) &&
+ (*inj_info & SVM_EXITINTINFO_VALID_ERR))
+ *inj_info_error_code = control->event_inj_err;
+ else
+ *inj_info_error_code = 0;
+
+}
+
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4992,6 +5008,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
+ .get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 83843379813ee3..f4c56f59f5c11b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -21,14 +21,29 @@ TRACE_EVENT(kvm_entry,
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
- ),
+ __field( u32, inj_info )
+ __field( u32, inj_info_err )
+ __field( bool, guest_mode )
+ __field( bool, req_imm_exit )
+ ),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
+
+ static_call(kvm_x86_get_entry_info)(vcpu,
+ &__entry->inj_info,
+ &__entry->inj_info_err);
+
+ __entry->req_imm_exit = vcpu->arch.req_immediate_exit;
+ __entry->guest_mode = is_guest_mode(vcpu);
),
- TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+ TP_printk("vcpu %u, rip 0x%lx inj 0x%08x inj_error_code 0x%08x%s%s",
+ __entry->vcpu_id, __entry->rip,
+ __entry->inj_info, __entry->inj_info_err,
+ __entry->req_imm_exit ? " [imm exit]" : "",
+ __entry->guest_mode ? " [guest]" : "")
);
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index eb7e42235e8811..9dd13f52d4999c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6156,6 +6156,17 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
+static void vmx_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ *inj_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if (is_exception_with_error_code(*inj_info))
+ *inj_info_error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ else
+ *inj_info_error_code = 0;
+}
+
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@@ -8297,6 +8308,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
+ .get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
--
2.26.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
@ 2023-09-24 12:44 ` Maxim Levitsky
2023-09-26 16:40 ` Paolo Bonzini
2023-09-24 12:44 ` [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
2023-09-26 0:03 ` [PATCH v2 0/4] KVM: x86: tracepoint updates Sean Christopherson
4 siblings, 1 reply; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-24 12:44 UTC (permalink / raw)
To: kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Paolo Bonzini, Ingo Molnar, Thomas Gleixner,
Borislav Petkov, Maxim Levitsky
Add:
- Flag that shows that the VM is in guest mode
- Bitmap of pending kvm requests.
- Flag showing that this VM exit is due to request to have
an immediate VM exit.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/trace.h | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index f4c56f59f5c11b..0657a3a348b4ae 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -320,12 +320,18 @@ TRACE_EVENT(name, \
__field( u32, intr_info ) \
__field( u32, error_code ) \
__field( unsigned int, vcpu_id ) \
+ __field( bool, guest_mode ) \
+ __field( u64, requests ) \
+ __field( bool, req_imm_exit ) \
), \
\
TP_fast_assign( \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
+ __entry->guest_mode = is_guest_mode(vcpu); \
+ __entry->requests = READ_ONCE(vcpu->requests); \
+ __entry->req_imm_exit = vcpu->arch.req_immediate_exit; \
static_call(kvm_x86_get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
@@ -335,11 +341,15 @@ TRACE_EVENT(name, \
), \
\
TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
- "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
+ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
+ "requests 0x%016llx%s%s", \
__entry->vcpu_id, \
kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
__entry->guest_rip, __entry->info1, __entry->info2, \
- __entry->intr_info, __entry->error_code) \
+ __entry->intr_info, __entry->error_code, \
+ __entry->requests, \
+ __entry->guest_mode ? " [guest]" : "", \
+ __entry->req_imm_exit ? " [imm exit]" : "") \
)
/*
--
2.26.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
` (2 preceding siblings ...)
2023-09-24 12:44 ` [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint Maxim Levitsky
@ 2023-09-24 12:44 ` Maxim Levitsky
2023-09-26 16:50 ` Paolo Bonzini
2023-09-26 0:03 ` [PATCH v2 0/4] KVM: x86: tracepoint updates Sean Christopherson
4 siblings, 1 reply; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-24 12:44 UTC (permalink / raw)
To: kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Paolo Bonzini, Ingo Molnar, Thomas Gleixner,
Borislav Petkov, Maxim Levitsky
Add 3 new tracepoints for nested VM exits which are intended
to capture extra information to gain insights about the nested guest
behavior.
The new tracepoints are:
- kvm_nested_msr
- kvm_nested_hypercall
These tracepoints capture extra register state to be able to know
which MSR or which hypercall was done.
- kvm_nested_page_fault
This tracepoint allows to capture extra info about which host pagefault
error code caused the nested page fault.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm/nested.c | 22 +++++++++
arch/x86/kvm/trace.h | 82 +++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx/nested.c | 21 +++++++++
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 3 ++
6 files changed, 127 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 321721813474f7..64c195fb8789f7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -966,6 +966,7 @@ struct kvm_vcpu_arch {
/* set at EPT violation at this point */
unsigned long exit_qualification;
+ u32 ept_fault_error_code;
/* pv related host specific info */
struct {
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dd496c9e5f91f2..1cd9c3ab60ab3a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
+ u64 host_error_code = vmcb->control.exit_info_1;
+
if (vmcb->control.exit_code != SVM_EXIT_NPF) {
/*
@@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
vmcb->control.exit_code_hi = 0;
vmcb->control.exit_info_1 = (1ULL << 32);
vmcb->control.exit_info_2 = fault->address;
+ host_error_code = 0;
}
vmcb->control.exit_info_1 &= ~0xffffffffULL;
vmcb->control.exit_info_1 |= fault->error_code;
+ trace_kvm_nested_page_fault(fault->address, host_error_code,
+ fault->error_code);
+
nested_svm_vmexit(svm);
}
@@ -1139,6 +1145,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.exit_int_info_err,
KVM_ISA_SVM);
+ /* Collect some info about nested VM exits */
+ switch (vmcb12->control.exit_code) {
+ case SVM_EXIT_MSR:
+ trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
+ kvm_rcx_read(vcpu),
+ (vmcb12->save.rax & -1u) |
+ (((u64)(kvm_rdx_read(vcpu) & -1u) << 32)));
+ break;
+ case SVM_EXIT_VMMCALL:
+ trace_kvm_nested_hypercall(vmcb12->save.rax,
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+ }
+
kvm_vcpu_unmap(vcpu, &map, true);
nested_svm_transition_tlb_flush(vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0657a3a348b4ae..d08ae87c536324 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -620,7 +620,7 @@ TRACE_EVENT(kvm_pv_eoi,
);
/*
- * Tracepoint for nested VMRUN
+ * Tracepoint for nested VMRUN/VMENTER
*/
TRACE_EVENT(kvm_nested_vmenter,
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
@@ -753,8 +753,84 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip: 0x%016llx", __entry->rip)
);
+
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for nested guest MSR access.
+ */
+TRACE_EVENT(kvm_nested_msr,
+ TP_PROTO(bool write, u32 ecx, u64 data),
+ TP_ARGS(write, ecx, data),
+
+ TP_STRUCT__entry(
+ __field( bool, write )
+ __field( u32, ecx )
+ __field( u64, data )
+ ),
+
+ TP_fast_assign(
+ __entry->write = write;
+ __entry->ecx = ecx;
+ __entry->data = data;
+ ),
+
+ TP_printk("msr_%s %x = 0x%llx",
+ __entry->write ? "write" : "read",
+ __entry->ecx, __entry->data)
+);
+
+/*
+ * Tracepoint for nested hypercalls, capturing generic info about the
+ * hypercall
+ */
+
+TRACE_EVENT(kvm_nested_hypercall,
+ TP_PROTO(u64 rax, u64 rbx, u64 rcx, u64 rdx),
+ TP_ARGS(rax, rbx, rcx, rdx),
+
+ TP_STRUCT__entry(
+ __field( u64, rax )
+ __field( u64, rbx )
+ __field( u64, rcx )
+ __field( u64, rdx )
+ ),
+
+ TP_fast_assign(
+ __entry->rax = rax;
+ __entry->rbx = rbx;
+ __entry->rcx = rcx;
+ __entry->rdx = rdx;
+ ),
+
+ TP_printk("rax 0x%llx rbx 0x%llx rcx 0x%llx rdx 0x%llx",
+ __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx)
+);
+
+
+TRACE_EVENT(kvm_nested_page_fault,
+ TP_PROTO(u64 gpa, u64 host_error_code, u64 guest_error_code),
+ TP_ARGS(gpa, host_error_code, guest_error_code),
+
+ TP_STRUCT__entry(
+ __field( u64, gpa )
+ __field( u64, host_error_code )
+ __field( u64, guest_errror_code )
+ ),
+
+ TP_fast_assign(
+ __entry->gpa = gpa;
+ __entry->host_error_code = host_error_code;
+ __entry->guest_errror_code = guest_error_code;
+ ),
+
+ TP_printk("gpa 0x%llx host err 0x%llx guest err 0x%llx",
+ __entry->gpa,
+ __entry->host_error_code,
+ __entry->guest_errror_code)
+);
+
+
+/*
+ * Tracepoint for invlpga
*/
TRACE_EVENT(kvm_invlpga,
TP_PROTO(__u64 rip, int asid, u64 address),
@@ -777,7 +853,7 @@ TRACE_EVENT(kvm_invlpga,
);
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for skinit
*/
TRACE_EVENT(kvm_skinit,
TP_PROTO(__u64 rip, __u32 slb),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c5ec0ef51ff78f..b3b89d5152cd39 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -402,6 +402,10 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
*/
nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
fault->address);
+
+ trace_kvm_nested_page_fault(fault->address,
+ vcpu->arch.ept_fault_error_code,
+ fault->error_code);
}
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
@@ -4877,6 +4881,23 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmcs12->vm_exit_intr_error_code,
KVM_ISA_VMX);
+ switch ((u16)vmcs12->vm_exit_reason) {
+ case EXIT_REASON_MSR_READ:
+ case EXIT_REASON_MSR_WRITE:
+ trace_kvm_nested_msr(vmcs12->vm_exit_reason == EXIT_REASON_MSR_WRITE,
+ kvm_rcx_read(vcpu),
+ (kvm_rax_read(vcpu) & -1u) |
+ (((u64)(kvm_rdx_read(vcpu) & -1u) << 32)));
+ break;
+ case EXIT_REASON_VMCALL:
+ trace_kvm_nested_hypercall(kvm_rax_read(vcpu),
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+
+ }
+
load_vmcs12_host_state(vcpu, vmcs12);
return;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9dd13f52d4999c..05fadcd38fde75 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5773,6 +5773,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
vcpu->arch.exit_qualification = exit_qualification;
+ vcpu->arch.ept_fault_error_code = error_code;
/*
* Check that the GPA doesn't exceed physical memory limits, as that is
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dfb7d25ed94f26..766d0dc333eac3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13637,6 +13637,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_hypercall);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
--
2.26.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH v2 0/4] KVM: x86: tracepoint updates
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
` (3 preceding siblings ...)
2023-09-24 12:44 ` [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
@ 2023-09-26 0:03 ` Sean Christopherson
2023-09-26 8:28 ` Maxim Levitsky
4 siblings, 1 reply; 17+ messages in thread
From: Sean Christopherson @ 2023-09-26 0:03 UTC (permalink / raw)
To: Maxim Levitsky
Cc: kvm, Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Paolo Bonzini, Ingo Molnar, Thomas Gleixner, Borislav Petkov
On Sun, Sep 24, 2023, Maxim Levitsky wrote:
> This patch series is intended to add some selected information
> to the kvm tracepoints to make it easier to gather insights about
> running nested guests.
>
> This patch series was developed together with a new x86 performance analysis tool
> that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
> which aims to be a better kvm_stat, and allows you at glance
> to see what is happening in a VM, including nesting.
Rather than add more and more tracepoints, I think we should be more thoughtful
about (a) where we place KVM's tracepoints and (b) giving userspace the necessary
hooks to write BPF programs to extract whatever data is needed at any give time.
There's simply no way we can iterate fast enough in KVM tracepoints to adapt to
userspace's debug/monitoring needs. E.g. if it turns out someone wants detailed
info on hypercalls that use memory or registers beyond ABCD, the new tracepoints
won't help them.
If all KVM tracepoints grab "struct kvm_vcpu" and force VMCS "registers" to be
cached (or decached depending on one's viewpoint), then I think that'll serve 99%
of usecases. E.g. the vCPU gives a BPF program kvm_vcpu, vcpu_{vmx,svm}, kvm, etc.
trace_kvm_exit is good example, where despite all of the information that is captured
by KVM, it's borderline worthless for CPUID and MSR exits because their interesting
information is held in registers and not captured in the VMCS or VMCB.
There are some on BTF type info issues that I've encountered, but I suspect that's
as much a PEBKAC problem as anything.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 0/4] KVM: x86: tracepoint updates
2023-09-26 0:03 ` [PATCH v2 0/4] KVM: x86: tracepoint updates Sean Christopherson
@ 2023-09-26 8:28 ` Maxim Levitsky
2023-09-26 16:36 ` Paolo Bonzini
0 siblings, 1 reply; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-26 8:28 UTC (permalink / raw)
To: Sean Christopherson
Cc: kvm, Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Paolo Bonzini, Ingo Molnar, Thomas Gleixner, Borislav Petkov
У пн, 2023-09-25 у 17:03 -0700, Sean Christopherson пише:
> On Sun, Sep 24, 2023, Maxim Levitsky wrote:
> > This patch series is intended to add some selected information
> > to the kvm tracepoints to make it easier to gather insights about
> > running nested guests.
> >
> > This patch series was developed together with a new x86 performance analysis tool
> > that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
> > which aims to be a better kvm_stat, and allows you at glance
> > to see what is happening in a VM, including nesting.
>
> Rather than add more and more tracepoints, I think we should be more thoughtful
> about (a) where we place KVM's tracepoints and (b) giving userspace the necessary
> hooks to write BPF programs to extract whatever data is needed at any give time.
>
> There's simply no way we can iterate fast enough in KVM tracepoints to adapt to
> userspace's debug/monitoring needs. E.g. if it turns out someone wants detailed
> info on hypercalls that use memory or registers beyond ABCD, the new tracepoints
> won't help them.
>
> If all KVM tracepoints grab "struct kvm_vcpu" and force VMCS "registers" to be
> cached (or decached depending on one's viewpoint), then I think that'll serve 99%
> of usecases. E.g. the vCPU gives a BPF program kvm_vcpu, vcpu_{vmx,svm}, kvm, etc.
>
> trace_kvm_exit is good example, where despite all of the information that is captured
> by KVM, it's borderline worthless for CPUID and MSR exits because their interesting
> information is held in registers and not captured in the VMCS or VMCB.
>
> There are some on BTF type info issues that I've encountered, but I suspect that's
> as much a PEBKAC problem as anything.
>
While eBPF has its use cases, none of the extra tracepoints were added solely because of
the monitoring tool and I do understand that tracepoints are a limited resource.
Each added tracepoint/info was added only when it was also found to be useful for regular
kvm tracing.
Best regards,
Maxim Levitsky
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 0/4] KVM: x86: tracepoint updates
2023-09-26 8:28 ` Maxim Levitsky
@ 2023-09-26 16:36 ` Paolo Bonzini
2023-09-26 20:45 ` Sean Christopherson
0 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2023-09-26 16:36 UTC (permalink / raw)
To: Maxim Levitsky, Sean Christopherson
Cc: kvm, Dave Hansen, x86, linux-kernel, H. Peter Anvin, Ingo Molnar,
Thomas Gleixner, Borislav Petkov
On 9/26/23 10:28, Maxim Levitsky wrote:
>> trace_kvm_exit is good example, where despite all of the information that is captured
>> by KVM, it's borderline worthless for CPUID and MSR exits because their interesting
>> information is held in registers and not captured in the VMCS or VMCB.
>>
>> There are some on BTF type info issues that I've encountered, but I suspect that's
>> as much a PEBKAC problem as anything.
>>
> While eBPF has its use cases, none of the extra tracepoints were added solely because of
> the monitoring tool and I do understand that tracepoints are a limited resource.
>
> Each added tracepoint/info was added only when it was also found to be useful for regular
> kvm tracing.
I am not sure about _all_ of them, but I agree with both of you.
On one hand, it would be pretty cool to have eBPF access to registers.
On the other hand, the specific info you're adding is generic and I
think there are only a couple exceptions where I am not sure it belongs
in the generic KVM tracepoints.
Paolo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic
2023-09-24 12:44 ` [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
@ 2023-09-26 16:37 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2023-09-26 16:37 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On 9/24/23 14:44, Maxim Levitsky wrote:
> + if (vcpu->arch.req_immediate_exit)
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> - static_call(kvm_x86_request_immediate_exit)(vcpu);
> - }
Is it enough for your use case to add a new tracepoint here, instead of
adding req_immediate_exit to both entry and exit?
Paolo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint
2023-09-24 12:44 ` [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
@ 2023-09-26 16:39 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2023-09-26 16:39 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On 9/24/23 14:44, Maxim Levitsky wrote:
> + __field( u32, inj_info )
> + __field( u32, inj_info_err )
> + __field( bool, guest_mode )
> + __field( bool, req_imm_exit )
> + ),
As anticipated in patch 1 I'm not so sure about adding req_imm_exit here
but also (especially) in kvm_exit. I do believe it should be traced,
I'm not sure it's needed in kvm_entry+kvm_exit as opposed to just a
separate tracepoint.
Paolo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint
2023-09-24 12:44 ` [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint Maxim Levitsky
@ 2023-09-26 16:40 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2023-09-26 16:40 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On 9/24/23 14:44, Maxim Levitsky wrote:
> + __field( bool, guest_mode ) \
> + __field( u64, requests ) \
> + __field( bool, req_imm_exit ) \
I am not sure about adding guest_mode or req_imm_exit here, because they
should always match kvm_entry.
Paolo
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints
2023-09-24 12:44 ` [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
@ 2023-09-26 16:50 ` Paolo Bonzini
2023-09-28 10:42 ` Maxim Levitsky
0 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2023-09-26 16:50 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On 9/24/23 14:44, Maxim Levitsky wrote:
> + trace_kvm_nested_page_fault(fault->address,
> + vcpu->arch.ept_fault_error_code,
Any reason to include vcpu->arch.ept_fault_error_code rather than the
injected exit qualification?
Paolo
> + fault->error_code);
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 0/4] KVM: x86: tracepoint updates
2023-09-26 16:36 ` Paolo Bonzini
@ 2023-09-26 20:45 ` Sean Christopherson
0 siblings, 0 replies; 17+ messages in thread
From: Sean Christopherson @ 2023-09-26 20:45 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Maxim Levitsky, kvm, Dave Hansen, x86, linux-kernel,
H. Peter Anvin, Ingo Molnar, Thomas Gleixner, Borislav Petkov
On Tue, Sep 26, 2023, Paolo Bonzini wrote:
> On 9/26/23 10:28, Maxim Levitsky wrote:
> > > trace_kvm_exit is good example, where despite all of the information that is captured
> > > by KVM, it's borderline worthless for CPUID and MSR exits because their interesting
> > > information is held in registers and not captured in the VMCS or VMCB.
> > >
> > > There are some on BTF type info issues that I've encountered, but I suspect that's
> > > as much a PEBKAC problem as anything.
> > >
> > While eBPF has its use cases, none of the extra tracepoints were added solely because of
> > the monitoring tool and I do understand that tracepoints are a limited resource.
> >
> > Each added tracepoint/info was added only when it was also found to be useful for regular
> > kvm tracing.
>
> I am not sure about _all_ of them, but I agree with both of you.
>
> On one hand, it would be pretty cool to have eBPF access to registers. On
> the other hand, the specific info you're adding is generic and I think there
> are only a couple exceptions where I am not sure it belongs in the generic
> KVM tracepoints.
I'm not saying this information isn't useful, *sometimes*. What I'm saying is
that I don't think it's sustainable/maintainble to keep expanding KVM's tracepoints.
E.g. why trace req_immediate_exit and not mmu_invalidate_seq[*]?
It's practically impossible to predict exactly what information will be useful in
the field. And for something like kvmon, IMO the onus is on userspace to do the
heavy lifting.
Rather than hardcode mounds of information in KVM's tracepoints, I think we should
refactor KVM to make it as easy as possible to use BPF to piggyback tracepoints
and get at data that *might* be interesting, and then add a variety of BPF programs
(e.g. using bpftrace for simplicity) somewhere in tools/ to provide equivalent
functionality to select existing tracepoints.
E.g. if we rework kvm_vcpu to place "struct kvm_vcpu_arch arch" at offset '0',
then we get at all the GPRs and pseudo-registers by hardcoding offsets into the
struct, i.e. without needing BTF type info. More elaborate trace programs would
likely need BTF, or maybe some clever shenanigans, but it seems very doable.
[*] https://lore.kernel.org/all/ZOaUdP46f8XjQvio@google.com
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic
2023-09-26 16:37 ` Paolo Bonzini
@ 2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 0 replies; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-28 10:41 UTC (permalink / raw)
To: Paolo Bonzini, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
> On 9/24/23 14:44, Maxim Levitsky wrote:
> > + if (vcpu->arch.req_immediate_exit)
> > kvm_make_request(KVM_REQ_EVENT, vcpu);
> > - static_call(kvm_x86_request_immediate_exit)(vcpu);
> > - }
>
> Is it enough for your use case to add a new tracepoint here, instead of
> adding req_immediate_exit to both entry and exit?
I prefer if possible to keep the req_immediate_exit field on entry only,
as it is pretty much an extension to the injected event data which I also
added to kvm_entry() tracepoint.
Best regards,
Maxim Levitsky
>
> Paolo
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint
2023-09-26 16:39 ` Paolo Bonzini
@ 2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 0 replies; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-28 10:41 UTC (permalink / raw)
To: Paolo Bonzini, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
У вт, 2023-09-26 у 18:39 +0200, Paolo Bonzini пише:
> On 9/24/23 14:44, Maxim Levitsky wrote:
> > + __field( u32, inj_info )
> > + __field( u32, inj_info_err )
> > + __field( bool, guest_mode )
> > + __field( bool, req_imm_exit )
> > + ),
>
> As anticipated in patch 1 I'm not so sure about adding req_imm_exit here
> but also (especially) in kvm_exit. I do believe it should be traced,
> I'm not sure it's needed in kvm_entry+kvm_exit as opposed to just a
> separate tracepoint.
I will drop guest_mode.
Best regards,
Maxim Levitsky
>
> Paolo
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint
2023-09-26 16:40 ` Paolo Bonzini
@ 2023-09-28 10:41 ` Maxim Levitsky
0 siblings, 0 replies; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-28 10:41 UTC (permalink / raw)
To: Paolo Bonzini, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On 2023-09-26 у 18:40 +0200, Paolo Bonzini wrote:
> On 9/24/23 14:44, Maxim Levitsky wrote:
> > + __field( bool, guest_mode ) \
> > + __field( u64, requests ) \
> > + __field( bool, req_imm_exit ) \
>
> I am not sure about adding guest_mode or req_imm_exit here, because they
> should always match kvm_entry.
>
> Paolo
>
I'll drop both.
Best regards,
Maxim levitsky
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints
2023-09-26 16:50 ` Paolo Bonzini
@ 2023-09-28 10:42 ` Maxim Levitsky
0 siblings, 0 replies; 17+ messages in thread
From: Maxim Levitsky @ 2023-09-28 10:42 UTC (permalink / raw)
To: Paolo Bonzini, kvm
Cc: Dave Hansen, x86, linux-kernel, H. Peter Anvin,
Sean Christopherson, Ingo Molnar, Thomas Gleixner,
Borislav Petkov
On, 2023-09-26 у 18:50 +0200, Paolo Bonzini writes:
> On 9/24/23 14:44, Maxim Levitsky wrote:
> > + trace_kvm_nested_page_fault(fault->address,
> > + vcpu->arch.ept_fault_error_code,
>
> Any reason to include vcpu->arch.ept_fault_error_code rather than the
> injected exit qualification?
The vcpu->arch.ept_fault_error_code is the original exit qualification
which I want to trace as the host error code
captured on ept fault to a new field, but I see new that I can use vmx_get_exit_qual()
to retrieve it again, thus avoiding the overhead.
Also for the injected error code I can use the 'exit_qualification' instead, I think
that I intended to so so but forgot to update the patch.
So something like that works fine for me:
trace_kvm_nested_page_fault(fault->address,
vmx_get_exit_qual(vcpu),
exit_qualification);
Best regards,
Maxim Levitsky
>
> Paolo
>
> > + fault->error_code);
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2023-09-28 10:43 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-24 12:44 [PATCH v2 0/4] KVM: x86: tracepoint updates Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 1/4] KVM: x86: refactor req_immediate_exit logic Maxim Levitsky
2023-09-26 16:37 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 2/4] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
2023-09-26 16:39 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 3/4] KVM: x86: add more information to kvm_exit tracepoint Maxim Levitsky
2023-09-26 16:40 ` Paolo Bonzini
2023-09-28 10:41 ` Maxim Levitsky
2023-09-24 12:44 ` [PATCH v2 4/4] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
2023-09-26 16:50 ` Paolo Bonzini
2023-09-28 10:42 ` Maxim Levitsky
2023-09-26 0:03 ` [PATCH v2 0/4] KVM: x86: tracepoint updates Sean Christopherson
2023-09-26 8:28 ` Maxim Levitsky
2023-09-26 16:36 ` Paolo Bonzini
2023-09-26 20:45 ` Sean Christopherson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).