* [RFC PATCH 4.14 v2] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
2023-10-17 14:06 ` Vitaly Kuznetsov
@ 2023-10-18 14:40 ` Riccardo Mancini
0 siblings, 0 replies; 3+ messages in thread
From: Riccardo Mancini @ 2023-10-18 14:40 UTC (permalink / raw)
To: vkuznets, pbonzini
Cc: graf, kvm, Riccardo Mancini, Eugene Batalov, Greg Farrell
Hey Vitaly, Paolo,
thank you both for the comments!
For visibility, here is the complete v2 patch with the additional safety
check to avoid handling page ready notifications from #PF if async-pf-int
is enabled.
Thanks,
Riccardo
Commit follows:
This patch backports support for interrupt-based delivery of Async Page
Fault notifications in KVM guests from commit b1d405751cd5 ("KVM: x86:
Switch KVM guest to using interrupts for page ready APF delivery") [1].
Differently from the patchet upstream, this patch does not remove
support for the legacy #PF-based delivery, and removes unnecessary
refactoring to limit changes to KVM guest code.
v2: add kvm_apf_int_enabled flag to prevent handling of PAGE_READY
[1] https://lore.kernel.org/kvm/20200525144125.143875-1-vkuznets@redhat.com/
Reviewed-by: Eugene Batalov <bataloe@amazon.com>
Reviewed-by: Greg Farrell <gffarre@amazon.com>
Signed-off-by: Riccardo Mancini <mancio@amazon.com>
---
arch/x86/entry/entry_32.S | 5 +++
arch/x86/entry/entry_64.S | 5 +++
arch/x86/include/asm/hardirq.h | 2 +-
arch/x86/include/asm/kvm_para.h | 7 ++++
arch/x86/include/uapi/asm/kvm_para.h | 16 +++++++++-
arch/x86/kernel/irq.c | 2 +-
arch/x86/kernel/kvm.c | 48 ++++++++++++++++++++++++----
7 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 1fdedb2eaef3..460eb7a9981e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -896,6 +896,11 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+BUILD_INTERRUPT3(kvm_async_pf_vector, HYPERVISOR_CALLBACK_VECTOR,
+ kvm_async_pf_intr)
+#endif
+
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1804ccf52d9b..545d911a2c9e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1113,6 +1113,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ kvm_async_pf_vector kvm_async_pf_intr
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 486c843273c4..75e51b7c9f50 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
unsigned int irq_hv_callback_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44049b1..f4806dd3c38d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -94,6 +95,12 @@ void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
+extern __visible void kvm_async_pf_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_kvm_async_pf_vector kvm_async_pf_vector
+#endif
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 341db0462b85..29b86e9adc9a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,8 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -42,6 +44,8 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
struct kvm_steal_time {
__u64 steal;
@@ -70,6 +74,11 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
@@ -101,8 +110,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
__u32 reason;
- __u8 pad[60];
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ __u32 token;
+
+ __u8 pad[56];
__u32 enabled;
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index fbcc303fb1f9..d1def86b66bd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5853eb50138e..12e5fddb6da1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,6 +79,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
+static DEFINE_PER_CPU(u32, kvm_apf_int_enabled);
+
/*
* No need for any "IO delay" on KVM
*/
@@ -267,26 +269,48 @@ dotraplinkage void
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
+ u32 reason = kvm_read_and_reset_pf_reason();
+ u32 handle_page_ready = !__this_cpu_read(kvm_apf_int_enabled);
- switch (kvm_read_and_reset_pf_reason()) {
- default:
+ if (!reason) {
+ /* This is a normal page fault */
do_page_fault(regs, error_code);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ } else if (reason & KVM_PV_REASON_PAGE_NOT_PRESENT) {
/* page is swapped out by the host. */
prev_state = exception_enter();
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
- break;
- case KVM_PV_REASON_PAGE_READY:
+ } else if (handle_page_ready && (reason & KVM_PV_REASON_PAGE_READY)) {
+ /* possible only if interrupt-based mechanism is disabled */
rcu_irq_enter();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
- break;
+ } else {
+ WARN_ONCE(1, "Unexpected async PF flags: %x\n", reason);
}
}
NOKPROBE_SYMBOL(do_async_page_fault);
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
+{
+ u32 token;
+
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (__this_cpu_read(apf_reason.enabled)) {
+ token = __this_cpu_read(apf_reason.token);
+ rcu_irq_enter();
+ kvm_async_pf_task_wake(token);
+ rcu_irq_exit();
+ __this_cpu_write(apf_reason.token, 0);
+ wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ }
+
+ exiting_irq();
+}
+
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -344,6 +368,12 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
+ wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ __this_cpu_write(kvm_apf_int_enabled, 1);
+ }
+
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
@@ -371,6 +401,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(kvm_apf_int_enabled, 0);
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
smp_processor_id());
@@ -490,6 +521,9 @@ void __init kvm_guest_init(void)
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf)
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
--
2.40.1
^ permalink raw reply related [flat|nested] 3+ messages in thread