* [PATCH v5 1/3] KVM: x86: add more information to the kvm_entry tracepoint
2024-09-10 20:03 [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
@ 2024-09-10 20:03 ` Maxim Levitsky
2024-12-18 20:53 ` Sean Christopherson
2024-09-10 20:03 ` [PATCH v5 2/3] KVM: x86: add information about pending requests to kvm_exit tracepoint Maxim Levitsky
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2024-09-10 20:03 UTC (permalink / raw)
To: kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, Sean Christopherson, H. Peter Anvin, linux-kernel,
Maxim Levitsky
Add VMX/SVM specific interrupt injection info to vm entry tracepoint.
Also add a flag showing that immediate vm exit is set to happen after
the entry.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 5 ++++-
arch/x86/kvm/svm/svm.c | 17 +++++++++++++++++
arch/x86/kvm/trace.h | 17 ++++++++++++++---
arch/x86/kvm/vmx/main.c | 1 +
arch/x86/kvm/vmx/vmx.c | 11 +++++++++++
arch/x86/kvm/vmx/x86_ops.h | 4 ++++
7 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 03b7e13f15bbd..af5c4d55d47bc 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52443ccda320f..8118f75a8a35d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1756,13 +1756,16 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
u32 *exit_int_info, u32 *exit_int_info_err_code);
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *inj_info, u32 *inj_info_error_code);
+
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a0506ef87dfb..485f3c2826312 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3525,6 +3525,22 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
+static void svm_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ *inj_info = control->event_inj;
+
+ if ((*inj_info & SVM_EXITINTINFO_VALID) &&
+ (*inj_info & SVM_EXITINTINFO_VALID_ERR))
+ *inj_info_error_code = control->event_inj_err;
+ else
+ *inj_info_error_code = 0;
+
+}
+
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5057,6 +5073,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
+ .get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index d3aeffd6ae753..b4c014b1aa9a1 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -22,16 +22,27 @@ TRACE_EVENT(kvm_entry,
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
__field( bool, immediate_exit )
+ __field( u32, inj_info )
+ __field( u32, inj_info_err )
+ __field( bool, guest_mode )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
- __entry->immediate_exit = force_immediate_exit;
+ __entry->immediate_exit = force_immediate_exit;
+ __entry->guest_mode = is_guest_mode(vcpu);
+
+ static_call(kvm_x86_get_entry_info)(vcpu,
+ &__entry->inj_info,
+ &__entry->inj_info_err);
),
- TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
- __entry->immediate_exit ? "[immediate exit]" : "")
+ TP_printk("vcpu %u, rip 0x%lx inj 0x%08x inj_error_code 0x%08x%s%s",
+ __entry->vcpu_id, __entry->rip,
+ __entry->inj_info, __entry->inj_info_err,
+ __entry->immediate_exit ? "[immediate exit]" : "",
+ __entry->guest_mode ? "[guest]" : "")
);
/*
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 7e2e78a142574..769a7ca06f59b 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -108,6 +108,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
+ .get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 89682832dded7..af18174cc4a20 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6180,6 +6180,17 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
+void vmx_get_entry_info(struct kvm_vcpu *vcpu,
+ u32 *inj_info,
+ u32 *inj_info_error_code)
+{
+ *inj_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if (is_exception_with_error_code(*inj_info))
+ *inj_info_error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ else
+ *inj_info_error_code = 0;
+}
+
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index b6a7cfc6ae317..124425997ec15 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -104,8 +104,12 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+
void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
+void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *inj_info,
+ u32 *inj_info_error_code);
+
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
--
2.26.3
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v5 1/3] KVM: x86: add more information to the kvm_entry tracepoint
2024-09-10 20:03 ` [PATCH v5 1/3] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
@ 2024-12-18 20:53 ` Sean Christopherson
0 siblings, 0 replies; 13+ messages in thread
From: Sean Christopherson @ 2024-12-18 20:53 UTC (permalink / raw)
To: Maxim Levitsky
Cc: kvm, x86, Dave Hansen, Thomas Gleixner, Borislav Petkov,
Paolo Bonzini, Ingo Molnar, H. Peter Anvin, linux-kernel
On Tue, Sep 10, 2024, Maxim Levitsky wrote:
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 52443ccda320f..8118f75a8a35d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1756,13 +1756,16 @@ struct kvm_x86_ops {
> void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
>
> /*
> - * Retrieve somewhat arbitrary exit information. Intended to
> + * Retrieve somewhat arbitrary exit/entry information. Intended to
> * be used only from within tracepoints or error paths.
> */
> void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
> u64 *info1, u64 *info2,
> u32 *exit_int_info, u32 *exit_int_info_err_code);
>
> + void (*get_entry_info)(struct kvm_vcpu *vcpu,
> + u32 *inj_info, u32 *inj_info_error_code);
I vote to use the same names as the kvm_exit tracepoint, i.e. intr_into and
error_code throughout. While I agree that capturing the "injection" aspect is
nice to have, if a user doesn't know that the fields are related to event/intr
injection, I don't think "inj" is going to help them connect the dots.
On the other, for cases where an event is re-injected, using the same names as
kvm_exit provides a direct connection between the event that was being vectored
at the time of exit, and the subsequent re-injection of the same event.
> int (*check_intercept)(struct kvm_vcpu *vcpu,
> struct x86_instruction_info *info,
> enum x86_intercept_stage stage,
...
> TP_fast_assign(
> __entry->vcpu_id = vcpu->vcpu_id;
> __entry->rip = kvm_rip_read(vcpu);
> - __entry->immediate_exit = force_immediate_exit;
> + __entry->immediate_exit = force_immediate_exit;
> + __entry->guest_mode = is_guest_mode(vcpu);
> +
> + static_call(kvm_x86_get_entry_info)(vcpu,
> + &__entry->inj_info,
> + &__entry->inj_info_err);
> ),
>
> - TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
> - __entry->immediate_exit ? "[immediate exit]" : "")
> + TP_printk("vcpu %u, rip 0x%lx inj 0x%08x inj_error_code 0x%08x%s%s",
> + __entry->vcpu_id, __entry->rip,
> + __entry->inj_info, __entry->inj_info_err,
> + __entry->immediate_exit ? "[immediate exit]" : "",
> + __entry->guest_mode ? "[guest]" : "")
I 100% agree kvm_entry should capture L1 vs. L2, but looking more closely, I
think we should make the entry and exit tracepoints, and then maybe rename
trace_kvm_nested_vmexit_inject() => trace_kvm_nested_vmexit().
Currently, trace_kvm_nested_vmexit() traces all exits from L2=>L0, which is rather
silly since it's trivial to capture L1 vs. L2 in kvm_exit. I also find it to be
quite annoying since the vast, vast majority of time I don't want to trace *just*
L2=>L0 exits. And it's especially annoying because if I want to see both L1 and
L2 exit, the trace contains a double dose of L2 exits.
Last thought, what about always capturing where the transition is occuring? E.g.
instead of tagging on "[guest]" at the end, something like this:
TP_printk("vcpu %u => L%u rip 0x%lx intr_info 0x%08x error_code 0x%08x%s",
__entry->vcpu_id, 1 + __entry->guest_mode,
...
and then in kvm_exit:
TP_printk("vcpu %u <= L%u reason %s%s%s rip 0x%lx info1 0x%016llx " \
"info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
"requests 0x%016llx", \
__entry->vcpu_id, 1 + __entry->guest_mode, \
Or use "to" and "from" if the "=>" / "<=" is too cute and confusing.
For now, I'm going to omit the is_guest_mode() change purely to avoid churn if
we end up squashing the current trace_kvm_nested_vmexit() into trace_kvm_exit().
As I'm about to disappear for two weeks, I'm going to speculatively apply the
below so I don't delay the meat of this patch any more than I already have.
Please holler if you disagree with the intr_info+error_code terminology, I'm
definitely open to other other names, though I do feel quite strongly that entry
and exit need to be consistent. These are sitting at the head of "misc", so I
can fixup without much fuss.
---
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 10 Sep 2024 16:03:48 -0400
Subject: [PATCH 1/2] KVM: x86: Add interrupt injection information to the
kvm_entry tracepoint
Add VMX/SVM specific interrupt injection info the kvm_entry tracepoint.
As is done with kvm_exit, gather the information via a kvm_x86_ops hook
to avoid the moderately costly VMREADs on VMX when the tracepoint isn't
enabled.
Opportunistically rename the parameters in the get_exit_info()
declaration to match the names used by both SVM and VMX.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20240910200350.264245-2-mlevitsk@redhat.com
[sean: drop is_guest_mode() change, use intr_info/error_code for names]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 7 +++++--
arch/x86/kvm/svm/svm.c | 16 ++++++++++++++++
arch/x86/kvm/trace.h | 9 ++++++++-
arch/x86/kvm/vmx/main.c | 1 +
arch/x86/kvm/vmx/vmx.c | 9 +++++++++
arch/x86/kvm/vmx/x86_ops.h | 3 +++
7 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 5aff7222e40f..8c04472829a0 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -100,6 +100,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1a09ac99132c..c07d8318e9d8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1770,12 +1770,15 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
- u32 *exit_int_info, u32 *exit_int_info_err_code);
+ u32 *intr_info, u32 *error_code);
+
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8fc2f4a97495..d06fe41a2de0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3542,6 +3542,21 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
+static void svm_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info,
+ u32 *error_code)
+{
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ *intr_info = control->event_inj;
+
+ if ((*intr_info & SVM_EXITINTINFO_VALID) &&
+ (*intr_info & SVM_EXITINTINFO_VALID_ERR))
+ *error_code = control->event_inj_err;
+ else
+ *error_code = 0;
+
+}
+
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5082,6 +5097,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
+ .get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index d3aeffd6ae75..c2edf4a36fad 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -22,15 +22,22 @@ TRACE_EVENT(kvm_entry,
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
__field( bool, immediate_exit )
+ __field( u32, intr_info )
+ __field( u32, error_code )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
__entry->immediate_exit = force_immediate_exit;
+
+ kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info,
+ &__entry->error_code);
),
- TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+ TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s",
+ __entry->vcpu_id, __entry->rip,
+ __entry->intr_info, __entry->error_code,
__entry->immediate_exit ? "[immediate exit]" : "")
);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 92d35cc6cd15..697e135ba0f3 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -111,6 +111,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
+ .get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e93c48ff61c5..3fd6df782492 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6194,6 +6194,15 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
+void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
+{
+ *intr_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if (is_exception_with_error_code(*intr_info))
+ *error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ else
+ *error_code = 0;
+}
+
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index a55981c5216e..f7f65e81920b 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -104,8 +104,11 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+
void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
+void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code);
+
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
base-commit: 43651b98dd23e3d2d11f14964e98801ba58feccb
--
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 2/3] KVM: x86: add information about pending requests to kvm_exit tracepoint
2024-09-10 20:03 [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
2024-09-10 20:03 ` [PATCH v5 1/3] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
@ 2024-09-10 20:03 ` Maxim Levitsky
2024-09-10 20:03 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
` (2 subsequent siblings)
4 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2024-09-10 20:03 UTC (permalink / raw)
To: kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, Sean Christopherson, H. Peter Anvin, linux-kernel,
Maxim Levitsky
This allows to gather information on how often kvm interrupts vCPUs due
to specific requests.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/trace.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b4c014b1aa9a1..5a5b7757e8456 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -319,12 +319,14 @@ TRACE_EVENT(name, \
__field( u32, intr_info ) \
__field( u32, error_code ) \
__field( unsigned int, vcpu_id ) \
+ __field( u64, requests ) \
), \
\
TP_fast_assign( \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
+ __entry->requests = READ_ONCE(vcpu->requests); \
kvm_x86_call(get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
@@ -334,11 +336,13 @@ TRACE_EVENT(name, \
), \
\
TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
- "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
+ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
+ "requests 0x%016llx", \
__entry->vcpu_id, \
kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
__entry->guest_rip, __entry->info1, __entry->info2, \
- __entry->intr_info, __entry->error_code) \
+ __entry->intr_info, __entry->error_code, \
+ __entry->requests) \
)
/*
--
2.26.3
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
2024-09-10 20:03 [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
2024-09-10 20:03 ` [PATCH v5 1/3] KVM: x86: add more information to the kvm_entry tracepoint Maxim Levitsky
2024-09-10 20:03 ` [PATCH v5 2/3] KVM: x86: add information about pending requests to kvm_exit tracepoint Maxim Levitsky
@ 2024-09-10 20:03 ` Maxim Levitsky
2024-12-18 21:14 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepointsg Sean Christopherson
2024-12-19 17:33 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Paolo Bonzini
2024-10-30 21:21 ` [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
2024-12-19 2:40 ` Sean Christopherson
4 siblings, 2 replies; 13+ messages in thread
From: Maxim Levitsky @ 2024-09-10 20:03 UTC (permalink / raw)
To: kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, Sean Christopherson, H. Peter Anvin, linux-kernel,
Maxim Levitsky
Add 3 new tracepoints for nested VM exits which are intended
to capture extra information to gain insights about the nested guest
behavior.
The new tracepoints are:
- kvm_nested_msr
- kvm_nested_hypercall
These tracepoints capture extra register state to be able to know
which MSR or which hypercall was done.
- kvm_nested_page_fault
This tracepoint allows to capture extra info about which host pagefault
error code caused the nested page fault.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/svm/nested.c | 22 +++++++++++
arch/x86/kvm/trace.h | 82 +++++++++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
arch/x86/kvm/x86.c | 3 ++
4 files changed, 131 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 6f704c1037e51..2020307481553 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
+ u64 host_error_code = vmcb->control.exit_info_1;
+
if (vmcb->control.exit_code != SVM_EXIT_NPF) {
/*
@@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
vmcb->control.exit_code_hi = 0;
vmcb->control.exit_info_1 = (1ULL << 32);
vmcb->control.exit_info_2 = fault->address;
+ host_error_code = 0;
}
vmcb->control.exit_info_1 &= ~0xffffffffULL;
vmcb->control.exit_info_1 |= fault->error_code;
+ trace_kvm_nested_page_fault(fault->address, host_error_code,
+ fault->error_code);
+
nested_svm_vmexit(svm);
}
@@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.exit_int_info_err,
KVM_ISA_SVM);
+ /* Collect some info about nested VM exits */
+ switch (vmcb12->control.exit_code) {
+ case SVM_EXIT_MSR:
+ trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
+ kvm_rcx_read(vcpu),
+ (vmcb12->save.rax & 0xFFFFFFFFull) |
+ (((u64)kvm_rdx_read(vcpu) << 32)));
+ break;
+ case SVM_EXIT_VMMCALL:
+ trace_kvm_nested_hypercall(vmcb12->save.rax,
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+ }
+
kvm_vcpu_unmap(vcpu, &map, true);
nested_svm_transition_tlb_flush(vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 5a5b7757e8456..6074b4f85d5e2 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -613,7 +613,7 @@ TRACE_EVENT(kvm_pv_eoi,
);
/*
- * Tracepoint for nested VMRUN
+ * Tracepoint for nested VMRUN/VMENTER
*/
TRACE_EVENT(kvm_nested_vmenter,
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
@@ -746,8 +746,84 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip: 0x%016llx", __entry->rip)
);
+
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for nested guest MSR access.
+ */
+TRACE_EVENT(kvm_nested_msr,
+ TP_PROTO(bool write, u32 ecx, u64 data),
+ TP_ARGS(write, ecx, data),
+
+ TP_STRUCT__entry(
+ __field( bool, write )
+ __field( u32, ecx )
+ __field( u64, data )
+ ),
+
+ TP_fast_assign(
+ __entry->write = write;
+ __entry->ecx = ecx;
+ __entry->data = data;
+ ),
+
+ TP_printk("msr_%s %x = 0x%llx",
+ __entry->write ? "write" : "read",
+ __entry->ecx, __entry->data)
+);
+
+/*
+ * Tracepoint for nested hypercalls, capturing generic info about the
+ * hypercall
+ */
+
+TRACE_EVENT(kvm_nested_hypercall,
+ TP_PROTO(u64 rax, u64 rbx, u64 rcx, u64 rdx),
+ TP_ARGS(rax, rbx, rcx, rdx),
+
+ TP_STRUCT__entry(
+ __field( u64, rax )
+ __field( u64, rbx )
+ __field( u64, rcx )
+ __field( u64, rdx )
+ ),
+
+ TP_fast_assign(
+ __entry->rax = rax;
+ __entry->rbx = rbx;
+ __entry->rcx = rcx;
+ __entry->rdx = rdx;
+ ),
+
+ TP_printk("rax 0x%llx rbx 0x%llx rcx 0x%llx rdx 0x%llx",
+ __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx)
+);
+
+
+TRACE_EVENT(kvm_nested_page_fault,
+ TP_PROTO(u64 gpa, u64 host_error_code, u64 guest_error_code),
+ TP_ARGS(gpa, host_error_code, guest_error_code),
+
+ TP_STRUCT__entry(
+ __field( u64, gpa )
+ __field( u64, host_error_code )
+ __field( u64, guest_errror_code )
+ ),
+
+ TP_fast_assign(
+ __entry->gpa = gpa;
+ __entry->host_error_code = host_error_code;
+ __entry->guest_errror_code = guest_error_code;
+ ),
+
+ TP_printk("gpa 0x%llx host err 0x%llx guest err 0x%llx",
+ __entry->gpa,
+ __entry->host_error_code,
+ __entry->guest_errror_code)
+);
+
+
+/*
+ * Tracepoint for invlpga
*/
TRACE_EVENT(kvm_invlpga,
TP_PROTO(__u64 rip, unsigned int asid, u64 address),
@@ -770,7 +846,7 @@ TRACE_EVENT(kvm_invlpga,
);
/*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for skinit
*/
TRACE_EVENT(kvm_skinit,
TP_PROTO(__u64 rip, __u32 slb),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2392a7ef254df..3881a02694fc2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -454,6 +454,16 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
*/
nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
fault->address);
+
+ /*
+ * vmx_get_exit_qual() returns the original exit qualification,
+ * before it was overridden with exit qualification that
+ * is about to be injected to the guest.
+ */
+
+ trace_kvm_nested_page_fault(fault->address,
+ vmx_get_exit_qual(vcpu),
+ exit_qualification);
}
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
@@ -4985,6 +4995,23 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmcs12->vm_exit_intr_error_code,
KVM_ISA_VMX);
+ switch ((u16)vmcs12->vm_exit_reason) {
+ case EXIT_REASON_MSR_READ:
+ case EXIT_REASON_MSR_WRITE:
+ trace_kvm_nested_msr(vmcs12->vm_exit_reason == EXIT_REASON_MSR_WRITE,
+ kvm_rcx_read(vcpu),
+ (kvm_rax_read(vcpu) & 0xFFFFFFFFull) |
+ (((u64)kvm_rdx_read(vcpu)) << 32));
+ break;
+ case EXIT_REASON_VMCALL:
+ trace_kvm_nested_hypercall(kvm_rax_read(vcpu),
+ kvm_rbx_read(vcpu),
+ kvm_rcx_read(vcpu),
+ kvm_rdx_read(vcpu));
+ break;
+
+ }
+
load_vmcs12_host_state(vcpu, vmcs12);
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f72e5d89e942d..cb01cf2ad6ac9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -14032,6 +14032,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_hypercall);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
--
2.26.3
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepointsg
2024-09-10 20:03 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
@ 2024-12-18 21:14 ` Sean Christopherson
2024-12-19 17:33 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Paolo Bonzini
1 sibling, 0 replies; 13+ messages in thread
From: Sean Christopherson @ 2024-12-18 21:14 UTC (permalink / raw)
To: Maxim Levitsky
Cc: kvm, x86, Dave Hansen, Thomas Gleixner, Borislav Petkov,
Paolo Bonzini, Ingo Molnar, H. Peter Anvin, linux-kernel
On Tue, Sep 10, 2024, Maxim Levitsky wrote:
> Add 3 new tracepoints for nested VM exits which are intended
> to capture extra information to gain insights about the nested guest
> behavior.
>
> The new tracepoints are:
>
> - kvm_nested_msr
> - kvm_nested_hypercall
I 100% agree that not having register state in the exit tracepoints is obnoxious,
but I don't think we should add one-off tracepoints for the most annoying cases.
I would much prefer to figure out a way to capture register state in kvm_entry
and kvm_exit. E.g. I've lost track of the number of times I've observed an MSR
exit without having trace_kvm_msr enabled.
One idea would be to capture E{A,B,C,D}X, which would cover MSRs, CPUID, and
most hypercalls. And then we might even be able to drop the dedicated MSR and
CPUID tracepoints (not sure if that's a good idea).
Side topic, arch/s390/kvm/trace.h has the concept of COMMON information that is
captured for multiple tracepoints. I haven't looked closely, but I gotta imagine
we can/should use a similar approach for x86.
> These tracepoints capture extra register state to be able to know
> which MSR or which hypercall was done.
>
> - kvm_nested_page_fault
>
> This tracepoint allows to capture extra info about which host pagefault
> error code caused the nested page fault.
The host error code, a.k.a. qualification info, is readily available in the
kvm_exit (or nested variant) tracepoint. I don't letting userspace skip a
tracepoint that's probably already enabled is worth the extra code to support
this tracepoint. The nested_svm_inject_npf_exit() code in particular is wonky,
and I think it's a good example of why userspace "needs" trace_kvm_exit, e.g. to
observe that a nested stage-2 page fault didn't originate from a hardware stage-2
fault.
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
2024-09-10 20:03 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
2024-12-18 21:14 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepointsg Sean Christopherson
@ 2024-12-19 17:33 ` Paolo Bonzini
2024-12-19 17:49 ` Maxim Levitsky
1 sibling, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2024-12-19 17:33 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
Sean Christopherson, H. Peter Anvin, linux-kernel
On 9/10/24 22:03, Maxim Levitsky wrote:
> Add 3 new tracepoints for nested VM exits which are intended
> to capture extra information to gain insights about the nested guest
> behavior.
>
> The new tracepoints are:
>
> - kvm_nested_msr
> - kvm_nested_hypercall
>
> These tracepoints capture extra register state to be able to know
> which MSR or which hypercall was done.
>
> - kvm_nested_page_fault
>
> This tracepoint allows to capture extra info about which host pagefault
> error code caused the nested page fault.
>
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
> arch/x86/kvm/svm/nested.c | 22 +++++++++++
> arch/x86/kvm/trace.h | 82 +++++++++++++++++++++++++++++++++++++--
> arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
> arch/x86/kvm/x86.c | 3 ++
> 4 files changed, 131 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 6f704c1037e51..2020307481553 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> struct vmcb *vmcb = svm->vmcb;
> + u64 host_error_code = vmcb->control.exit_info_1;
> +
>
> if (vmcb->control.exit_code != SVM_EXIT_NPF) {
> /*
> @@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> vmcb->control.exit_code_hi = 0;
> vmcb->control.exit_info_1 = (1ULL << 32);
> vmcb->control.exit_info_2 = fault->address;
> + host_error_code = 0;
> }
>
> vmcb->control.exit_info_1 &= ~0xffffffffULL;
> vmcb->control.exit_info_1 |= fault->error_code;
>
> + trace_kvm_nested_page_fault(fault->address, host_error_code,
> + fault->error_code);
> +
I disagree with Sean about trace_kvm_nested_page_fault. It's a useful
addition and it is easier to understand what's happening with a
dedicated tracepoint (especially on VMX).
Tracepoint are not an exact science and they aren't entirely kernel API.
At least they can just go away at any time (changing them is a lot
more tricky, but their presence is not guaranteed). The one below has
the slight ugliness of having to do some computation in
nested_svm_vmexit(), this one should go in.
> nested_svm_vmexit(svm);
> }
>
> @@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> vmcb12->control.exit_int_info_err,
> KVM_ISA_SVM);
>
> + /* Collect some info about nested VM exits */
> + switch (vmcb12->control.exit_code) {
> + case SVM_EXIT_MSR:
> + trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
> + kvm_rcx_read(vcpu),
> + (vmcb12->save.rax & 0xFFFFFFFFull) |
> + (((u64)kvm_rdx_read(vcpu) << 32)));
> + break;
> + case SVM_EXIT_VMMCALL:
> + trace_kvm_nested_hypercall(vmcb12->save.rax,
> + kvm_rbx_read(vcpu),
> + kvm_rcx_read(vcpu),
> + kvm_rdx_read(vcpu));
> + break;
Here I probably would have preferred an unconditional tracepoint giving
RAX/RBX/RCX/RDX after a nested vmexit. This is not exactly what Sean
wanted but perhaps it strikes a middle ground? I know you wrote this
for a debugging tool, do you really need to have everything in a single
tracepoint, or can you correlate the existing exit tracepoint with this
hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
Paolo
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
2024-12-19 17:33 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Paolo Bonzini
@ 2024-12-19 17:49 ` Maxim Levitsky
2024-12-19 18:02 ` Paolo Bonzini
0 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2024-12-19 17:49 UTC (permalink / raw)
To: Paolo Bonzini, kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
Sean Christopherson, H. Peter Anvin, linux-kernel
On Thu, 2024-12-19 at 18:33 +0100, Paolo Bonzini wrote:
> On 9/10/24 22:03, Maxim Levitsky wrote:
> > Add 3 new tracepoints for nested VM exits which are intended
> > to capture extra information to gain insights about the nested guest
> > behavior.
> >
> > The new tracepoints are:
> >
> > - kvm_nested_msr
> > - kvm_nested_hypercall
> >
> > These tracepoints capture extra register state to be able to know
> > which MSR or which hypercall was done.
> >
> > - kvm_nested_page_fault
> >
> > This tracepoint allows to capture extra info about which host pagefault
> > error code caused the nested page fault.
> >
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> > arch/x86/kvm/svm/nested.c | 22 +++++++++++
> > arch/x86/kvm/trace.h | 82 +++++++++++++++++++++++++++++++++++++--
> > arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
> > arch/x86/kvm/x86.c | 3 ++
> > 4 files changed, 131 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index 6f704c1037e51..2020307481553 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> > {
> > struct vcpu_svm *svm = to_svm(vcpu);
> > struct vmcb *vmcb = svm->vmcb;
> > + u64 host_error_code = vmcb->control.exit_info_1;
> > +
> >
> > if (vmcb->control.exit_code != SVM_EXIT_NPF) {
> > /*
> > @@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> > vmcb->control.exit_code_hi = 0;
> > vmcb->control.exit_info_1 = (1ULL << 32);
> > vmcb->control.exit_info_2 = fault->address;
> > + host_error_code = 0;
> > }
> >
> > vmcb->control.exit_info_1 &= ~0xffffffffULL;
> > vmcb->control.exit_info_1 |= fault->error_code;
> >
> > + trace_kvm_nested_page_fault(fault->address, host_error_code,
> > + fault->error_code);
> > +
>
> I disagree with Sean about trace_kvm_nested_page_fault. It's a useful
> addition and it is easier to understand what's happening with a
> dedicated tracepoint (especially on VMX).
>
> Tracepoint are not an exact science and they aren't entirely kernel API.
> At least they can just go away at any time (changing them is a lot
> more tricky, but their presence is not guaranteed). The one below has
> the slight ugliness of having to do some computation in
> nested_svm_vmexit(), this one should go in.
>
> > nested_svm_vmexit(svm);
> > }
> >
> > @@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> > vmcb12->control.exit_int_info_err,
> > KVM_ISA_SVM);
> >
> > + /* Collect some info about nested VM exits */
> > + switch (vmcb12->control.exit_code) {
> > + case SVM_EXIT_MSR:
> > + trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
> > + kvm_rcx_read(vcpu),
> > + (vmcb12->save.rax & 0xFFFFFFFFull) |
> > + (((u64)kvm_rdx_read(vcpu) << 32)));
> > + break;
> > + case SVM_EXIT_VMMCALL:
> > + trace_kvm_nested_hypercall(vmcb12->save.rax,
> > + kvm_rbx_read(vcpu),
> > + kvm_rcx_read(vcpu),
> > + kvm_rdx_read(vcpu));
> > + break;
>
> Here I probably would have preferred an unconditional tracepoint giving
> RAX/RBX/RCX/RDX after a nested vmexit. This is not exactly what Sean
> wanted but perhaps it strikes a middle ground? I know you wrote this
> for a debugging tool, do you really need to have everything in a single
> tracepoint, or can you correlate the existing exit tracepoint with this
> hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
Hi!
If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number argument, then
I can enable this new tracepoint twice with a different filter (vm_exit_num number == msr and vm_exit_num == vmcall),
and each instance will count the events that I need.
So this can work.
Thanks!
Best regards,
Maxim Levitsky
>
> Paolo
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
2024-12-19 17:49 ` Maxim Levitsky
@ 2024-12-19 18:02 ` Paolo Bonzini
2025-01-16 22:53 ` Sean Christopherson
0 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2024-12-19 18:02 UTC (permalink / raw)
To: Maxim Levitsky, kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
Sean Christopherson, H. Peter Anvin, linux-kernel
On 12/19/24 18:49, Maxim Levitsky wrote:
>> Here I probably would have preferred an unconditional tracepoint giving
>> RAX/RBX/RCX/RDX after a nested vmexit. This is not exactly what Sean
>> wanted but perhaps it strikes a middle ground? I know you wrote this
>> for a debugging tool, do you really need to have everything in a single
>> tracepoint, or can you correlate the existing exit tracepoint with this
>> hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
>
> Hi!
>
> If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number argument, then
> I can enable this new tracepoint twice with a different filter (vm_exit_num number == msr and vm_exit_num == vmcall),
> and each instance will count the events that I need.
>
> So this can work.
Ok, thanks. On one hand it may make sense to have trace_kvm_exit_regs
and trace_kvm_nested_exit_regs (you can even extend the
TRACE_EVENT_KVM_EXIT macro to generate both the exit and the exit_regs
tracepoint). On the other hand it seems to me that this new tracepoint
is kinda reinventing the wheel; your patch adding nested equivalents of
trace_kvm_hypercall and trace_kvm_msr seems more obvious to me.
I see Sean's point in not wanting one-off tracepoints, on the other hand
there is value in having similar tracepoints for the L1->L0 and L2->L0
cases. I'll let him choose between the two possibilities (a new
*_exit_regs pair, or just apply this patch) but I think there should be
one of these two.
Paolo
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
2024-12-19 18:02 ` Paolo Bonzini
@ 2025-01-16 22:53 ` Sean Christopherson
0 siblings, 0 replies; 13+ messages in thread
From: Sean Christopherson @ 2025-01-16 22:53 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Maxim Levitsky, kvm, x86, Dave Hansen, Thomas Gleixner,
Borislav Petkov, Ingo Molnar, H. Peter Anvin, linux-kernel
On Thu, Dec 19, 2024, Paolo Bonzini wrote:
> On 12/19/24 18:49, Maxim Levitsky wrote:
> > > Here I probably would have preferred an unconditional tracepoint giving
> > > RAX/RBX/RCX/RDX after a nested vmexit. This is not exactly what Sean
> > > wanted but perhaps it strikes a middle ground? I know you wrote this
> > > for a debugging tool, do you really need to have everything in a single
> > > tracepoint, or can you correlate the existing exit tracepoint with this
> > > hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
> >
> > Hi!
> >
> > If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number
> > argument, then I can enable this new tracepoint twice with a different
> > filter (vm_exit_num number == msr and vm_exit_num == vmcall), and each
> > instance will count the events that I need.
> >
> > So this can work.
> Ok, thanks. On one hand it may make sense to have trace_kvm_exit_regs and
> trace_kvm_nested_exit_regs (you can even extend the TRACE_EVENT_KVM_EXIT
> macro to generate both the exit and the exit_regs tracepoint). On the other
> hand it seems to me that this new tracepoint is kinda reinventing the wheel;
> your patch adding nested equivalents of trace_kvm_hypercall and
> trace_kvm_msr seems more obvious to me.
>
> I see Sean's point in not wanting one-off tracepoints, on the other hand
> there is value in having similar tracepoints for the L1->L0 and L2->L0
> cases.
I don't understand why we want two (or three, or five) tracepoints for the same
thing. I want to go the opposite direction and (a) delete kvm_nested_vmexit
and then (b) rename kvm_nested_vmexit_inject => kvm_nested_vmexit so that it
pairs with kvm_nested_vmenter.
Similary, having kvm_nested_intr_vmexit is asinine when kvm_nested_vmexit_inject
captures *more* information about the IRQ itself.
I don't see the point of trace_kvm_nested_exit_regs. Except for L1 vs. L2, it's
redundant. kvm_nested_vmexit_inject and kvm_nested_vmenter are useful because
they capture novel information.
> I'll let him choose between the two possibilities (a new *_exit_regs
> pair, or just apply this patch) but I think there should be one of these
> two.
Anything but a pair. Why can't we capture L1 vs. L2 in the tracepoints and call
it a day?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 0/3] KVM: x86: tracepoint updates
2024-09-10 20:03 [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
` (2 preceding siblings ...)
2024-09-10 20:03 ` [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints Maxim Levitsky
@ 2024-10-30 21:21 ` Maxim Levitsky
2024-11-22 1:04 ` Maxim Levitsky
2024-12-19 2:40 ` Sean Christopherson
4 siblings, 1 reply; 13+ messages in thread
From: Maxim Levitsky @ 2024-10-30 21:21 UTC (permalink / raw)
To: kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, Sean Christopherson, H. Peter Anvin, linux-kernel
On Tue, 2024-09-10 at 16:03 -0400, Maxim Levitsky wrote:
> This patch series is intended to add some selected information
> to the kvm tracepoints to make it easier to gather insights about
> running nested guests.
>
> This patch series was developed together with a new x86 performance analysis tool
> that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
> which aims to be a better kvm_stat, and allows you at glance
> to see what is happening in a VM, including nesting.
>
> V5: rebased on top of recent changes
>
> Best regards,
> Maxim Levitsky
>
> Maxim Levitsky (3):
> KVM: x86: add more information to the kvm_entry tracepoint
> KVM: x86: add information about pending requests to kvm_exit
> tracepoint
> KVM: x86: add new nested vmexit tracepoints
>
> arch/x86/include/asm/kvm-x86-ops.h | 1 +
> arch/x86/include/asm/kvm_host.h | 5 +-
> arch/x86/kvm/svm/nested.c | 22 ++++++
> arch/x86/kvm/svm/svm.c | 17 +++++
> arch/x86/kvm/trace.h | 107 ++++++++++++++++++++++++++---
> arch/x86/kvm/vmx/main.c | 1 +
> arch/x86/kvm/vmx/nested.c | 27 ++++++++
> arch/x86/kvm/vmx/vmx.c | 11 +++
> arch/x86/kvm/vmx/x86_ops.h | 4 ++
> arch/x86/kvm/x86.c | 3 +
> 10 files changed, 189 insertions(+), 9 deletions(-)
>
> --
> 2.26.3
>
>
Hi,
A very gentle ping on this patch series.
Best regards,
Maxim Levitsky
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v5 0/3] KVM: x86: tracepoint updates
2024-10-30 21:21 ` [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
@ 2024-11-22 1:04 ` Maxim Levitsky
0 siblings, 0 replies; 13+ messages in thread
From: Maxim Levitsky @ 2024-11-22 1:04 UTC (permalink / raw)
To: kvm
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, Sean Christopherson, H. Peter Anvin, linux-kernel
On Wed, 2024-10-30 at 17:21 -0400, Maxim Levitsky wrote:
> On Tue, 2024-09-10 at 16:03 -0400, Maxim Levitsky wrote:
> > This patch series is intended to add some selected information
> > to the kvm tracepoints to make it easier to gather insights about
> > running nested guests.
> >
> > This patch series was developed together with a new x86 performance analysis tool
> > that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
> > which aims to be a better kvm_stat, and allows you at glance
> > to see what is happening in a VM, including nesting.
> >
> > V5: rebased on top of recent changes
> >
> > Best regards,
> > Maxim Levitsky
> >
> > Maxim Levitsky (3):
> > KVM: x86: add more information to the kvm_entry tracepoint
> > KVM: x86: add information about pending requests to kvm_exit
> > tracepoint
> > KVM: x86: add new nested vmexit tracepoints
> >
> > arch/x86/include/asm/kvm-x86-ops.h | 1 +
> > arch/x86/include/asm/kvm_host.h | 5 +-
> > arch/x86/kvm/svm/nested.c | 22 ++++++
> > arch/x86/kvm/svm/svm.c | 17 +++++
> > arch/x86/kvm/trace.h | 107 ++++++++++++++++++++++++++---
> > arch/x86/kvm/vmx/main.c | 1 +
> > arch/x86/kvm/vmx/nested.c | 27 ++++++++
> > arch/x86/kvm/vmx/vmx.c | 11 +++
> > arch/x86/kvm/vmx/x86_ops.h | 4 ++
> > arch/x86/kvm/x86.c | 3 +
> > 10 files changed, 189 insertions(+), 9 deletions(-)
> >
> > --
> > 2.26.3
> >
> >
>
> Hi,
> A very gentle ping on this patch series.
>
> Best regards,
> Maxim Levitsky
>
Another kind ping on this patch series.
Best regards,
Maxim Levitsky
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 0/3] KVM: x86: tracepoint updates
2024-09-10 20:03 [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
` (3 preceding siblings ...)
2024-10-30 21:21 ` [PATCH v5 0/3] KVM: x86: tracepoint updates Maxim Levitsky
@ 2024-12-19 2:40 ` Sean Christopherson
4 siblings, 0 replies; 13+ messages in thread
From: Sean Christopherson @ 2024-12-19 2:40 UTC (permalink / raw)
To: Sean Christopherson, kvm, Maxim Levitsky
Cc: x86, Dave Hansen, Thomas Gleixner, Borislav Petkov, Paolo Bonzini,
Ingo Molnar, H. Peter Anvin, linux-kernel
On Tue, 10 Sep 2024 16:03:47 -0400, Maxim Levitsky wrote:
> This patch series is intended to add some selected information
> to the kvm tracepoints to make it easier to gather insights about
> running nested guests.
>
> This patch series was developed together with a new x86 performance analysis tool
> that I developed recently (https://gitlab.com/maximlevitsky/kvmon)
> which aims to be a better kvm_stat, and allows you at glance
> to see what is happening in a VM, including nesting.
>
> [...]
Applied 1 and 2 to kvm-x86 misc, with the massaging to patch 1 I detailed earlier.
Again, please holler if you disagree with anything. Thanks!
[1/3] KVM: x86: add more information to the kvm_entry tracepoint
https://github.com/kvm-x86/linux/commit/3e633e7e7d07
[2/3] KVM: x86: add information about pending requests to kvm_exit tracepoint
https://github.com/kvm-x86/linux/commit/0e77b324110c
[3/3] KVM: x86: add new nested vmexit tracepoints
(no commit info)
--
https://github.com/kvm-x86/linux/tree/next
^ permalink raw reply [flat|nested] 13+ messages in thread