* [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-05 15:38 Bug? Incompatible APF for 4.14 guest on 5.10 and later host Vitaly Kuznetsov
@ 2023-10-13 16:36 ` Riccardo Mancini
2023-10-16 14:18 ` Vitaly Kuznetsov
0 siblings, 1 reply; 7+ messages in thread
From: Riccardo Mancini @ 2023-10-13 16:36 UTC (permalink / raw)
To: vkuznets; +Cc: bataloe, graf, kvm, pbonzini, Riccardo Mancini, Greg Farrell
Hey Vitaly,
thanks for your suggestion!
I've backported the guest-side of the patchset to 4.14.326, could you help
us and take a look at the backport?
I only backported the original patchset, I'm not sure if there's any
other patch (bug fix) that needs to be included in the backpotrt.
We've tested it in our environment and it gets rid of the performance
regression when running on 5.10 host, with no detected functional issue,
either on 4.14 or on 5.10.
I don't think the 4.14 stable tree accepts any patch that adds a feature
like this one, but we're looking into downstreaming it.
This patch might also be useful to somebody else hitting our same issue.
Thanks,
Riccardo
Commit follows:
This patch backports support for interrupt-based delivery of Async Page
Fault notifications in KVM guests from commit b1d405751cd5 ("KVM: x86:
Switch KVM guest to using interrupts for page ready APF delivery") [1].
Differently from the patchet upstream, this patch does not remove
support for the legacy #PF-based delivery, and removes unnecessary
refactoring to limit changes to KVM guest code.
[1] https://lore.kernel.org/kvm/20200525144125.143875-1-vkuznets@redhat.com/
Reviewed-by: Eugene Batalov <bataloe@amazon.com>
Reviewed-by: Greg Farrell <gffarre@amazon.com>
Signed-off-by: Riccardo Mancini <mancio@amazon.com>
---
arch/x86/entry/entry_32.S | 5 ++++
arch/x86/entry/entry_64.S | 5 ++++
arch/x86/include/asm/hardirq.h | 2 +-
arch/x86/include/asm/kvm_para.h | 7 +++++
arch/x86/include/uapi/asm/kvm_para.h | 16 ++++++++++-
arch/x86/kernel/irq.c | 2 +-
arch/x86/kernel/kvm.c | 42 +++++++++++++++++++++++-----
7 files changed, 69 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 1fdedb2eaef3..460eb7a9981e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -896,6 +896,11 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+BUILD_INTERRUPT3(kvm_async_pf_vector, HYPERVISOR_CALLBACK_VECTOR,
+ kvm_async_pf_intr)
+#endif
+
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1804ccf52d9b..545d911a2c9e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1113,6 +1113,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ kvm_async_pf_vector kvm_async_pf_intr
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 486c843273c4..75e51b7c9f50 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
unsigned int irq_hv_callback_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44049b1..f4806dd3c38d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -94,6 +95,12 @@ void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
+extern __visible void kvm_async_pf_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_kvm_async_pf_vector kvm_async_pf_vector
+#endif
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 341db0462b85..29b86e9adc9a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,8 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -42,6 +44,8 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
struct kvm_steal_time {
__u64 steal;
@@ -70,6 +74,11 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
@@ -101,8 +110,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
__u32 reason;
- __u8 pad[60];
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ __u32 token;
+
+ __u8 pad[56];
__u32 enabled;
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index fbcc303fb1f9..d1def86b66bd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5853eb50138e..50390628342d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -267,26 +267,46 @@ dotraplinkage void
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
+ u32 reason = kvm_read_and_reset_pf_reason();
- switch (kvm_read_and_reset_pf_reason()) {
- default:
+ if (!reason) {
+ /* This is a normal page fault */
do_page_fault(regs, error_code);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ } else if (reason & KVM_PV_REASON_PAGE_NOT_PRESENT) {
/* page is swapped out by the host. */
prev_state = exception_enter();
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
- break;
- case KVM_PV_REASON_PAGE_READY:
+ } else if (reason & KVM_PV_REASON_PAGE_READY) {
rcu_irq_enter();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
- break;
+ } else {
+ WARN_ONCE(1, "Unexpected async PF flags: %x\n", reason);
}
}
NOKPROBE_SYMBOL(do_async_page_fault);
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
+{
+ u32 token;
+
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (__this_cpu_read(apf_reason.enabled)) {
+ token = __this_cpu_read(apf_reason.token);
+ rcu_irq_enter();
+ kvm_async_pf_task_wake(token);
+ rcu_irq_exit();
+ __this_cpu_write(apf_reason.token, 0);
+ wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ }
+
+ exiting_irq();
+}
+
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -344,6 +364,11 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
+ wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ }
+
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
@@ -490,6 +515,9 @@ void __init kvm_guest_init(void)
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf)
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-13 16:36 ` [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest Riccardo Mancini
@ 2023-10-16 14:18 ` Vitaly Kuznetsov
2023-10-16 21:57 ` Paolo Bonzini
0 siblings, 1 reply; 7+ messages in thread
From: Vitaly Kuznetsov @ 2023-10-16 14:18 UTC (permalink / raw)
To: Riccardo Mancini
Cc: bataloe, graf, kvm, pbonzini, Riccardo Mancini, Greg Farrell
Riccardo Mancini <mancio@amazon.com> writes:
> Hey Vitaly,
>
> thanks for your suggestion!
> I've backported the guest-side of the patchset to 4.14.326, could you help
> us and take a look at the backport?
> I only backported the original patchset, I'm not sure if there's any
> other patch (bug fix) that needs to be included in the backpotrt.
I remember us fixing PV feature enablement/disablement for
hibernation/kdump later, see e.g.
commit 8b79feffeca28c5459458fe78676b081e87c93a4
Author: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed Apr 14 14:35:41 2021 +0200
x86/kvm: Teardown PV features on boot CPU as well
commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce
Author: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed Apr 14 14:35:43 2021 +0200
x86/kvm: Disable all PV features on crash
if you're interested in such use-cases. I don't recall any required
fixes for normal operation.
> We've tested it in our environment and it gets rid of the performance
> regression when running on 5.10 host, with no detected functional issue,
> either on 4.14 or on 5.10.
> I don't think the 4.14 stable tree accepts any patch that adds a feature
> like this one, but we're looking into downstreaming it.
> This patch might also be useful to somebody else hitting our same issue.
>
> Thanks,
> Riccardo
>
> Commit follows:
>
> This patch backports support for interrupt-based delivery of Async Page
> Fault notifications in KVM guests from commit b1d405751cd5 ("KVM: x86:
> Switch KVM guest to using interrupts for page ready APF delivery") [1].
>
> Differently from the patchet upstream, this patch does not remove
> support for the legacy #PF-based delivery, and removes unnecessary
> refactoring to limit changes to KVM guest code.
Keeping the legacy mechanism can be problematic when hypervisor is using
interrupt-based delivery: #PF handler in the guest for genuine #PFs will
still be trying to call do_async_page_fault() and, in case there's
something in the shared memory (kvm_read_and_reset_pf_reason()) it will
actually try to process it. I'm not 100% sure this is safe and the guest
can't try consuming something half-baked. At the bare minimum, this will
result in a supprious interrupt from guest's PoV (likely safe but
undesired).
In case keeping legacy mechanism is a must, I would suggest you somehow
record the fact that the guest has opted for interrupt-based delivery
(e.g. set a global variable or use a static key) and short-circuit
do_async_page_fault() to immediately return and not do anything in this
case.
The rest of the backport looks good to me but I've only eyeballed it and
I may not remember some (any?) 4.14 details...
>
> [1] https://lore.kernel.org/kvm/20200525144125.143875-1-vkuznets@redhat.com/
>
> Reviewed-by: Eugene Batalov <bataloe@amazon.com>
> Reviewed-by: Greg Farrell <gffarre@amazon.com>
> Signed-off-by: Riccardo Mancini <mancio@amazon.com>
>
> ---
> arch/x86/entry/entry_32.S | 5 ++++
> arch/x86/entry/entry_64.S | 5 ++++
> arch/x86/include/asm/hardirq.h | 2 +-
> arch/x86/include/asm/kvm_para.h | 7 +++++
> arch/x86/include/uapi/asm/kvm_para.h | 16 ++++++++++-
> arch/x86/kernel/irq.c | 2 +-
> arch/x86/kernel/kvm.c | 42 +++++++++++++++++++++++-----
> 7 files changed, 69 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 1fdedb2eaef3..460eb7a9981e 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -896,6 +896,11 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
>
> #endif /* CONFIG_HYPERV */
>
> +#ifdef CONFIG_KVM_GUEST
> +BUILD_INTERRUPT3(kvm_async_pf_vector, HYPERVISOR_CALLBACK_VECTOR,
> + kvm_async_pf_intr)
> +#endif
> +
> ENTRY(page_fault)
> ASM_CLAC
> pushl $do_page_fault
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 1804ccf52d9b..545d911a2c9e 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -1113,6 +1113,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> hyperv_callback_vector hyperv_vector_handler
> #endif /* CONFIG_HYPERV */
>
> +#ifdef CONFIG_KVM_GUEST
> +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> + kvm_async_pf_vector kvm_async_pf_intr
> +#endif
> +
> idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
> idtentry int3 do_int3 has_error_code=0 create_gap=1
> idtentry stack_segment do_stack_segment has_error_code=1
> diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
> index 486c843273c4..75e51b7c9f50 100644
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -37,7 +37,7 @@ typedef struct {
> #ifdef CONFIG_X86_MCE_AMD
> unsigned int irq_deferred_error_count;
> #endif
> -#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
> +#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
> unsigned int irq_hv_callback_count;
> #endif
> } ____cacheline_aligned irq_cpustat_t;
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index c373e44049b1..f4806dd3c38d 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -4,6 +4,7 @@
>
> #include <asm/processor.h>
> #include <asm/alternative.h>
> +#include <linux/interrupt.h>
> #include <uapi/asm/kvm_para.h>
>
> extern void kvmclock_init(void);
> @@ -94,6 +95,12 @@ void kvm_async_pf_task_wake(u32 token);
> u32 kvm_read_and_reset_pf_reason(void);
> extern void kvm_disable_steal_time(void);
>
> +extern __visible void kvm_async_pf_vector(void);
> +#ifdef CONFIG_TRACING
> +#define trace_kvm_async_pf_vector kvm_async_pf_vector
> +#endif
> +__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs);
> +
> #ifdef CONFIG_PARAVIRT_SPINLOCKS
> void __init kvm_spinlock_init(void);
> #else /* !CONFIG_PARAVIRT_SPINLOCKS */
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index 341db0462b85..29b86e9adc9a 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -26,6 +26,8 @@
> #define KVM_FEATURE_PV_EOI 6
> #define KVM_FEATURE_PV_UNHALT 7
> #define KVM_FEATURE_ASYNC_PF_VMEXIT 10
> +#define KVM_FEATURE_ASYNC_PF_INT 14
> +#define KVM_FEATURE_MSI_EXT_DEST_ID 15
>
> /* The last 8 bits are used to indicate how to interpret the flags field
> * in pvclock structure. If no bits are set, all flags are ignored.
> @@ -42,6 +44,8 @@
> #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> #define MSR_KVM_STEAL_TIME 0x4b564d03
> #define MSR_KVM_PV_EOI_EN 0x4b564d04
> +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
> +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
>
> struct kvm_steal_time {
> __u64 steal;
> @@ -70,6 +74,11 @@ struct kvm_clock_pairing {
> #define KVM_ASYNC_PF_ENABLED (1 << 0)
> #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
> #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
> +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
> +
> +/* MSR_KVM_ASYNC_PF_INT */
> +#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
> +
>
> /* Operations for KVM_HC_MMU_OP */
> #define KVM_MMU_OP_WRITE_PTE 1
> @@ -101,8 +110,13 @@ struct kvm_mmu_op_release_pt {
> #define KVM_PV_REASON_PAGE_READY 2
>
> struct kvm_vcpu_pv_apf_data {
> + /* Used for 'page not present' events delivered via #PF */
> __u32 reason;
> - __u8 pad[60];
> +
> + /* Used for 'page ready' events delivered via interrupt notification */
> + __u32 token;
> +
> + __u8 pad[56];
> __u32 enabled;
> };
>
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index fbcc303fb1f9..d1def86b66bd 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
> seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
> seq_puts(p, " Machine check polls\n");
> #endif
> -#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
> +#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
> if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
> seq_printf(p, "%*s: ", prec, "HYP");
> for_each_online_cpu(j)
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 5853eb50138e..50390628342d 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -267,26 +267,46 @@ dotraplinkage void
> do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
> {
> enum ctx_state prev_state;
> + u32 reason = kvm_read_and_reset_pf_reason();
>
> - switch (kvm_read_and_reset_pf_reason()) {
> - default:
> + if (!reason) {
> + /* This is a normal page fault */
> do_page_fault(regs, error_code);
> - break;
> - case KVM_PV_REASON_PAGE_NOT_PRESENT:
> + } else if (reason & KVM_PV_REASON_PAGE_NOT_PRESENT) {
> /* page is swapped out by the host. */
> prev_state = exception_enter();
> kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
> exception_exit(prev_state);
> - break;
> - case KVM_PV_REASON_PAGE_READY:
> + } else if (reason & KVM_PV_REASON_PAGE_READY) {
> rcu_irq_enter();
> kvm_async_pf_task_wake((u32)read_cr2());
> rcu_irq_exit();
> - break;
> + } else {
> + WARN_ONCE(1, "Unexpected async PF flags: %x\n", reason);
> }
> }
> NOKPROBE_SYMBOL(do_async_page_fault);
>
> +__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
> +{
> + u32 token;
> +
> + entering_ack_irq();
> +
> + inc_irq_stat(irq_hv_callback_count);
> +
> + if (__this_cpu_read(apf_reason.enabled)) {
> + token = __this_cpu_read(apf_reason.token);
> + rcu_irq_enter();
> + kvm_async_pf_task_wake(token);
> + rcu_irq_exit();
> + __this_cpu_write(apf_reason.token, 0);
> + wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
> + }
> +
> + exiting_irq();
> +}
> +
> static void __init paravirt_ops_setup(void)
> {
> pv_info.name = "KVM";
> @@ -344,6 +364,11 @@ static void kvm_guest_cpu_init(void)
> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
> pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
>
> + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
> + pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
> + wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
> + }
> +
> wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
> __this_cpu_write(apf_reason.enabled, 1);
> printk(KERN_INFO"KVM setup async PF for cpu %d\n",
> @@ -490,6 +515,9 @@ void __init kvm_guest_init(void)
> if (kvmclock_vsyscall)
> kvm_setup_vsyscall_timeinfo();
>
> + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf)
> + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
> +
> #ifdef CONFIG_SMP
> smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
> if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
--
Vitaly
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-16 14:18 ` Vitaly Kuznetsov
@ 2023-10-16 21:57 ` Paolo Bonzini
2023-10-17 11:22 ` Vitaly Kuznetsov
0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2023-10-16 21:57 UTC (permalink / raw)
To: Vitaly Kuznetsov, Riccardo Mancini; +Cc: bataloe, graf, kvm, Greg Farrell
On 10/16/23 16:18, Vitaly Kuznetsov wrote:
> In case keeping legacy mechanism is a must, I would suggest you somehow
> record the fact that the guest has opted for interrupt-based delivery
> (e.g. set a global variable or use a static key) and short-circuit
> do_async_page_fault() to immediately return and not do anything in this
> case.
I guess you mean "not do anything for KVM_PV_REASON_PAGE_READY in this
case"?
Paolo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-16 21:57 ` Paolo Bonzini
@ 2023-10-17 11:22 ` Vitaly Kuznetsov
0 siblings, 0 replies; 7+ messages in thread
From: Vitaly Kuznetsov @ 2023-10-17 11:22 UTC (permalink / raw)
To: Paolo Bonzini, Riccardo Mancini; +Cc: bataloe, graf, kvm, Greg Farrell
Paolo Bonzini <pbonzini@redhat.com> writes:
> On 10/16/23 16:18, Vitaly Kuznetsov wrote:
>> In case keeping legacy mechanism is a must, I would suggest you somehow
>> record the fact that the guest has opted for interrupt-based delivery
>> (e.g. set a global variable or use a static key) and short-circuit
>> do_async_page_fault() to immediately return and not do anything in this
>> case.
>
> I guess you mean "not do anything for KVM_PV_REASON_PAGE_READY in this
> case"?
Yes, of course: KVM_PV_REASON_PAGE_NOT_PRESENT is always a #PF.
--
Vitaly
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
@ 2023-10-17 13:44 Mancini, Riccardo
2023-10-17 14:06 ` Vitaly Kuznetsov
0 siblings, 1 reply; 7+ messages in thread
From: Mancini, Riccardo @ 2023-10-17 13:44 UTC (permalink / raw)
To: Vitaly Kuznetsov, Paolo Bonzini
Cc: Batalov, Eugene, Graf (AWS), Alexander, kvm@vger.kernel.org,
Farrell, Greg
Hey,
Thank you both for the quick feedback.
> > I've backported the guest-side of the patchset to 4.14.326, could you
> > help us and take a look at the backport?
> > I only backported the original patchset, I'm not sure if there's any
> > other patch (bug fix) that needs to be included in the backpotrt.
>
> I remember us fixing PV feature enablement/disablement for hibernation/kdump later, see e.g.
>
> commit 8b79feffeca28c5459458fe78676b081e87c93a4
> Author: Vitaly Kuznetsov <vkuznets@redhat.com>
> Date: Wed Apr 14 14:35:41 2021 +0200
>
> x86/kvm: Teardown PV features on boot CPU as well
>
> commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce
> Author: Vitaly Kuznetsov <vkuznets@redhat.com>
> Date: Wed Apr 14 14:35:43 2021 +0200
>
> x86/kvm: Disable all PV features on crash
>
> if you're interested in such use-cases. I don't recall any required fixes for normal operation.
These look like issues already present in 4.14, not introduced by the
interrupt-based mechanism, correct?
If so, I wouldn't chase them.
Furthermore, I don't even think we hit those use cases in our scenario.
>
> Paolo Bonzini <pbonzini@redhat.com> writes:
>
> > On 10/16/23 16:18, Vitaly Kuznetsov wrote:
> >> In case keeping legacy mechanism is a must, I would suggest you
> >> somehow record the fact that the guest has opted for interrupt-based
> >> delivery (e.g. set a global variable or use a static key) and
> >> short-circuit
> >> do_async_page_fault() to immediately return and not do anything in
> >> this case.
> >
> > I guess you mean "not do anything for KVM_PV_REASON_PAGE_READY in this
> > case"?
>
> Yes, of course: KVM_PV_REASON_PAGE_NOT_PRESENT is always a #PF.
I agree this is a difference with the upstream asyncpf-int implementation and
it's theoretically incorrect. I think this shouldn't happen in a normal case,
but it's better to keep it consistent.
I'll add a check that asyncpf-int is _not_ enabled before processing
KVM_PV_REASON_PAGE_READY. Draft diff below.
Thanks,
Riccardo
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 582a366b82d8..bdfdffd35939 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,6 +79,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
+static DEFINE_PER_CPU(u32, kvm_apf_int_enabled);
+
/*
* No need for any "IO delay" on KVM
*/
@@ -277,7 +279,8 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
prev_state = exception_enter();
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
- } else if (reason & KVM_PV_REASON_PAGE_READY) {
+ } else if (!__this_cpu_read(kvm_apf_int_enabled) && (reason & KVM_PV_REASON_PAGE_READY)) {
+ /* this event is only possible if interrupt-based mechanism is disabled */
rcu_irq_enter();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
@@ -367,6 +370,7 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ __this_cpu_write(kvm_apf_int_enabled, 1);
}
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
@@ -396,6 +400,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(kvm_apf_int_enabled, 0);
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
smp_processor_id());
^ permalink raw reply related [flat|nested] 7+ messages in thread
* RE: [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-17 13:44 [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest Mancini, Riccardo
@ 2023-10-17 14:06 ` Vitaly Kuznetsov
2023-10-18 14:40 ` [RFC PATCH 4.14 v2] " Riccardo Mancini
0 siblings, 1 reply; 7+ messages in thread
From: Vitaly Kuznetsov @ 2023-10-17 14:06 UTC (permalink / raw)
To: Mancini, Riccardo, Paolo Bonzini
Cc: Batalov, Eugene, Graf (AWS), Alexander, kvm@vger.kernel.org,
Farrell, Greg
"Mancini, Riccardo" <mancio@amazon.com> writes:
> Hey,
>
> Thank you both for the quick feedback.
>
>> > I've backported the guest-side of the patchset to 4.14.326, could you
>> > help us and take a look at the backport?
>> > I only backported the original patchset, I'm not sure if there's any
>> > other patch (bug fix) that needs to be included in the backpotrt.
>>
>> I remember us fixing PV feature enablement/disablement for hibernation/kdump later, see e.g.
>>
>> commit 8b79feffeca28c5459458fe78676b081e87c93a4
>> Author: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Date: Wed Apr 14 14:35:41 2021 +0200
>>
>> x86/kvm: Teardown PV features on boot CPU as well
>>
>> commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce
>> Author: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Date: Wed Apr 14 14:35:43 2021 +0200
>>
>> x86/kvm: Disable all PV features on crash
>>
>> if you're interested in such use-cases. I don't recall any required fixes for normal operation.
>
> These look like issues already present in 4.14, not introduced by the
> interrupt-based mechanism, correct?
> If so, I wouldn't chase them.
> Furthermore, I don't even think we hit those use cases in our scenario.
>
>>
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>>
>> > On 10/16/23 16:18, Vitaly Kuznetsov wrote:
>> >> In case keeping legacy mechanism is a must, I would suggest you
>> >> somehow record the fact that the guest has opted for interrupt-based
>> >> delivery (e.g. set a global variable or use a static key) and
>> >> short-circuit
>> >> do_async_page_fault() to immediately return and not do anything in
>> >> this case.
>> >
>> > I guess you mean "not do anything for KVM_PV_REASON_PAGE_READY in this
>> > case"?
>>
>> Yes, of course: KVM_PV_REASON_PAGE_NOT_PRESENT is always a #PF.
>
> I agree this is a difference with the upstream asyncpf-int implementation and
> it's theoretically incorrect. I think this shouldn't happen in a normal case,
> but it's better to keep it consistent.
> I'll add a check that asyncpf-int is _not_ enabled before processing
> KVM_PV_REASON_PAGE_READY. Draft diff below.
>
> Thanks,
> Riccardo
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 582a366b82d8..bdfdffd35939 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -79,6 +79,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
> static int has_steal_clock = 0;
>
> +static DEFINE_PER_CPU(u32, kvm_apf_int_enabled);
> +
> /*
> * No need for any "IO delay" on KVM
> */
> @@ -277,7 +279,8 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
> prev_state = exception_enter();
> kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
> exception_exit(prev_state);
> - } else if (reason & KVM_PV_REASON_PAGE_READY) {
> + } else if (!__this_cpu_read(kvm_apf_int_enabled) && (reason & KVM_PV_REASON_PAGE_READY)) {
My bad: I completely forgot KVM_PV_REASON_PAGE_READY is actually not
used for interrupt-based delivery:
commit 9ce372b33a2ebbd0b965148879ae169a0015d3f3
Author: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu May 7 16:36:02 2020 +0200
KVM: x86: drop KVM_PV_REASON_PAGE_READY case from kvm_handle_page_fault()
and in fact there's nothing else but KVM_PV_REASON_PAGE_NOT_PRESENT in
apf_reason.flags so I agree that this should never happen (but
'kvm_apf_int_enabled' is a good hardening anyway :-)
> + /* this event is only possible if interrupt-based mechanism is disabled */
> rcu_irq_enter();
> kvm_async_pf_task_wake((u32)read_cr2());
> rcu_irq_exit();
> @@ -367,6 +370,7 @@ static void kvm_guest_cpu_init(void)
> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
> pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
> wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
> + __this_cpu_write(kvm_apf_int_enabled, 1);
> }
>
> wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
> @@ -396,6 +400,7 @@ static void kvm_pv_disable_apf(void)
>
> wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
> __this_cpu_write(apf_reason.enabled, 0);
> + __this_cpu_write(kvm_apf_int_enabled, 0);
>
> printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
> smp_processor_id());
>
--
Vitaly
^ permalink raw reply [flat|nested] 7+ messages in thread
* [RFC PATCH 4.14 v2] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-17 14:06 ` Vitaly Kuznetsov
@ 2023-10-18 14:40 ` Riccardo Mancini
0 siblings, 0 replies; 7+ messages in thread
From: Riccardo Mancini @ 2023-10-18 14:40 UTC (permalink / raw)
To: vkuznets, pbonzini
Cc: graf, kvm, Riccardo Mancini, Eugene Batalov, Greg Farrell
Hey Vitaly, Paolo,
thank you both for the comments!
For visibility, here is the complete v2 patch with the additional safety
check to avoid handling page ready notifications from #PF if async-pf-int
is enabled.
Thanks,
Riccardo
Commit follows:
This patch backports support for interrupt-based delivery of Async Page
Fault notifications in KVM guests from commit b1d405751cd5 ("KVM: x86:
Switch KVM guest to using interrupts for page ready APF delivery") [1].
Differently from the patchet upstream, this patch does not remove
support for the legacy #PF-based delivery, and removes unnecessary
refactoring to limit changes to KVM guest code.
v2: add kvm_apf_int_enabled flag to prevent handling of PAGE_READY
[1] https://lore.kernel.org/kvm/20200525144125.143875-1-vkuznets@redhat.com/
Reviewed-by: Eugene Batalov <bataloe@amazon.com>
Reviewed-by: Greg Farrell <gffarre@amazon.com>
Signed-off-by: Riccardo Mancini <mancio@amazon.com>
---
arch/x86/entry/entry_32.S | 5 +++
arch/x86/entry/entry_64.S | 5 +++
arch/x86/include/asm/hardirq.h | 2 +-
arch/x86/include/asm/kvm_para.h | 7 ++++
arch/x86/include/uapi/asm/kvm_para.h | 16 +++++++++-
arch/x86/kernel/irq.c | 2 +-
arch/x86/kernel/kvm.c | 48 ++++++++++++++++++++++++----
7 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 1fdedb2eaef3..460eb7a9981e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -896,6 +896,11 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+BUILD_INTERRUPT3(kvm_async_pf_vector, HYPERVISOR_CALLBACK_VECTOR,
+ kvm_async_pf_intr)
+#endif
+
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1804ccf52d9b..545d911a2c9e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1113,6 +1113,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ kvm_async_pf_vector kvm_async_pf_intr
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 486c843273c4..75e51b7c9f50 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
unsigned int irq_hv_callback_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44049b1..f4806dd3c38d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -94,6 +95,12 @@ void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
+extern __visible void kvm_async_pf_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_kvm_async_pf_vector kvm_async_pf_vector
+#endif
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 341db0462b85..29b86e9adc9a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,8 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -42,6 +44,8 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
struct kvm_steal_time {
__u64 steal;
@@ -70,6 +74,11 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
@@ -101,8 +110,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
__u32 reason;
- __u8 pad[60];
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ __u32 token;
+
+ __u8 pad[56];
__u32 enabled;
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index fbcc303fb1f9..d1def86b66bd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5853eb50138e..12e5fddb6da1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,6 +79,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
+static DEFINE_PER_CPU(u32, kvm_apf_int_enabled);
+
/*
* No need for any "IO delay" on KVM
*/
@@ -267,26 +269,48 @@ dotraplinkage void
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
+ u32 reason = kvm_read_and_reset_pf_reason();
+ u32 handle_page_ready = !__this_cpu_read(kvm_apf_int_enabled);
- switch (kvm_read_and_reset_pf_reason()) {
- default:
+ if (!reason) {
+ /* This is a normal page fault */
do_page_fault(regs, error_code);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ } else if (reason & KVM_PV_REASON_PAGE_NOT_PRESENT) {
/* page is swapped out by the host. */
prev_state = exception_enter();
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
- break;
- case KVM_PV_REASON_PAGE_READY:
+ } else if (handle_page_ready && (reason & KVM_PV_REASON_PAGE_READY)) {
+ /* possible only if interrupt-based mechanism is disabled */
rcu_irq_enter();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
- break;
+ } else {
+ WARN_ONCE(1, "Unexpected async PF flags: %x\n", reason);
}
}
NOKPROBE_SYMBOL(do_async_page_fault);
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
+{
+ u32 token;
+
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (__this_cpu_read(apf_reason.enabled)) {
+ token = __this_cpu_read(apf_reason.token);
+ rcu_irq_enter();
+ kvm_async_pf_task_wake(token);
+ rcu_irq_exit();
+ __this_cpu_write(apf_reason.token, 0);
+ wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ }
+
+ exiting_irq();
+}
+
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -344,6 +368,12 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
+ wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ __this_cpu_write(kvm_apf_int_enabled, 1);
+ }
+
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
@@ -371,6 +401,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(kvm_apf_int_enabled, 0);
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
smp_processor_id());
@@ -490,6 +521,9 @@ void __init kvm_guest_init(void)
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf)
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-10-18 14:41 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-17 13:44 [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest Mancini, Riccardo
2023-10-17 14:06 ` Vitaly Kuznetsov
2023-10-18 14:40 ` [RFC PATCH 4.14 v2] " Riccardo Mancini
-- strict thread matches above, loose matches on Subject: below --
2023-10-05 15:38 Bug? Incompatible APF for 4.14 guest on 5.10 and later host Vitaly Kuznetsov
2023-10-13 16:36 ` [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest Riccardo Mancini
2023-10-16 14:18 ` Vitaly Kuznetsov
2023-10-16 21:57 ` Paolo Bonzini
2023-10-17 11:22 ` Vitaly Kuznetsov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox