From: Riccardo Mancini <mancio@amazon.com>
To: <vkuznets@redhat.com>, <pbonzini@redhat.com>
Cc: <graf@amazon.de>, <kvm@vger.kernel.org>,
Riccardo Mancini <mancio@amazon.com>,
Eugene Batalov <bataloe@amazon.com>,
Greg Farrell <gffarre@amazon.com>
Subject: [RFC PATCH 4.14 v2] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest
Date: Wed, 18 Oct 2023 14:40:16 +0000 [thread overview]
Message-ID: <20231018144016.5635-1-mancio@amazon.com> (raw)
In-Reply-To: <87mswh7390.fsf@redhat.com>
Hey Vitaly, Paolo,
thank you both for the comments!
For visibility, here is the complete v2 patch with the additional safety
check to avoid handling page ready notifications from #PF if async-pf-int
is enabled.
Thanks,
Riccardo
Commit follows:
This patch backports support for interrupt-based delivery of Async Page
Fault notifications in KVM guests from commit b1d405751cd5 ("KVM: x86:
Switch KVM guest to using interrupts for page ready APF delivery") [1].
Differently from the patchet upstream, this patch does not remove
support for the legacy #PF-based delivery, and removes unnecessary
refactoring to limit changes to KVM guest code.
v2: add kvm_apf_int_enabled flag to prevent handling of PAGE_READY
[1] https://lore.kernel.org/kvm/20200525144125.143875-1-vkuznets@redhat.com/
Reviewed-by: Eugene Batalov <bataloe@amazon.com>
Reviewed-by: Greg Farrell <gffarre@amazon.com>
Signed-off-by: Riccardo Mancini <mancio@amazon.com>
---
arch/x86/entry/entry_32.S | 5 +++
arch/x86/entry/entry_64.S | 5 +++
arch/x86/include/asm/hardirq.h | 2 +-
arch/x86/include/asm/kvm_para.h | 7 ++++
arch/x86/include/uapi/asm/kvm_para.h | 16 +++++++++-
arch/x86/kernel/irq.c | 2 +-
arch/x86/kernel/kvm.c | 48 ++++++++++++++++++++++++----
7 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 1fdedb2eaef3..460eb7a9981e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -896,6 +896,11 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+BUILD_INTERRUPT3(kvm_async_pf_vector, HYPERVISOR_CALLBACK_VECTOR,
+ kvm_async_pf_intr)
+#endif
+
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1804ccf52d9b..545d911a2c9e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1113,6 +1113,11 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
+#ifdef CONFIG_KVM_GUEST
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ kvm_async_pf_vector kvm_async_pf_intr
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 486c843273c4..75e51b7c9f50 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
unsigned int irq_hv_callback_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44049b1..f4806dd3c38d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -94,6 +95,12 @@ void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
+extern __visible void kvm_async_pf_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_kvm_async_pf_vector kvm_async_pf_vector
+#endif
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 341db0462b85..29b86e9adc9a 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,8 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -42,6 +44,8 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
struct kvm_steal_time {
__u64 steal;
@@ -70,6 +74,11 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
@@ -101,8 +110,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
__u32 reason;
- __u8 pad[60];
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ __u32 token;
+
+ __u8 pad[56];
__u32 enabled;
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index fbcc303fb1f9..d1def86b66bd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) || defined(CONFIG_KVM_GUEST)
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5853eb50138e..12e5fddb6da1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,6 +79,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
+static DEFINE_PER_CPU(u32, kvm_apf_int_enabled);
+
/*
* No need for any "IO delay" on KVM
*/
@@ -267,26 +269,48 @@ dotraplinkage void
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
+ u32 reason = kvm_read_and_reset_pf_reason();
+ u32 handle_page_ready = !__this_cpu_read(kvm_apf_int_enabled);
- switch (kvm_read_and_reset_pf_reason()) {
- default:
+ if (!reason) {
+ /* This is a normal page fault */
do_page_fault(regs, error_code);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ } else if (reason & KVM_PV_REASON_PAGE_NOT_PRESENT) {
/* page is swapped out by the host. */
prev_state = exception_enter();
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
- break;
- case KVM_PV_REASON_PAGE_READY:
+ } else if (handle_page_ready && (reason & KVM_PV_REASON_PAGE_READY)) {
+ /* possible only if interrupt-based mechanism is disabled */
rcu_irq_enter();
kvm_async_pf_task_wake((u32)read_cr2());
rcu_irq_exit();
- break;
+ } else {
+ WARN_ONCE(1, "Unexpected async PF flags: %x\n", reason);
}
}
NOKPROBE_SYMBOL(do_async_page_fault);
+__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
+{
+ u32 token;
+
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (__this_cpu_read(apf_reason.enabled)) {
+ token = __this_cpu_read(apf_reason.token);
+ rcu_irq_enter();
+ kvm_async_pf_task_wake(token);
+ rcu_irq_exit();
+ __this_cpu_write(apf_reason.token, 0);
+ wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ }
+
+ exiting_irq();
+}
+
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -344,6 +368,12 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT)) {
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_INT;
+ wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+ __this_cpu_write(kvm_apf_int_enabled, 1);
+ }
+
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
@@ -371,6 +401,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(kvm_apf_int_enabled, 0);
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
smp_processor_id());
@@ -490,6 +521,9 @@ void __init kvm_guest_init(void)
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf)
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
--
2.40.1
prev parent reply other threads:[~2023-10-18 14:41 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-17 13:44 [RFC PATCH 4.14] KVM: x86: Backport support for interrupt-based APF page-ready delivery in guest Mancini, Riccardo
2023-10-17 14:06 ` Vitaly Kuznetsov
2023-10-18 14:40 ` Riccardo Mancini [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231018144016.5635-1-mancio@amazon.com \
--to=mancio@amazon.com \
--cc=bataloe@amazon.com \
--cc=gffarre@amazon.com \
--cc=graf@amazon.de \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=vkuznets@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox