From: Sean Christopherson <seanjc@google.com>
To: Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Joerg Roedel <joro@8bytes.org>,
David Woodhouse <dwmw2@infradead.org>,
Lu Baolu <baolu.lu@linux.intel.com>
Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
kvm@vger.kernel.org, iommu@lists.linux.dev,
linux-kernel@vger.kernel.org, Sairaj Kodilkar <sarunkod@amd.com>,
Vasant Hegde <vasant.hegde@amd.com>,
Maxim Levitsky <mlevitsk@redhat.com>,
Joao Martins <joao.m.martins@oracle.com>,
Francesco Lavra <francescolavra.fl@gmail.com>,
David Matlack <dmatlack@google.com>
Subject: [PATCH v3 53/62] KVM: x86: Decouple device assignment from IRQ bypass
Date: Wed, 11 Jun 2025 15:45:56 -0700 [thread overview]
Message-ID: <20250611224604.313496-55-seanjc@google.com> (raw)
In-Reply-To: <20250611224604.313496-2-seanjc@google.com>
Use a dedicated counter to track the number of IRQs that can utilize IRQ
bypass instead of piggybacking the assigned device count. As evidenced by
commit 2edd9cb79fb3 ("kvm: detect assigned device via irqbypass manager"),
it's possible for a device to be able to post IRQs to a vCPU without said
device being assigned to a VM.
Leave the calls to kvm_arch_{start,end}_assignment() alone for the moment
to avoid regressing the MMIO stale data mitigation. KVM is abusing the
assigned device count when applying mmio_stale_data_clear, and it's not at
all clear if vDPA devices rely on this behavior. This will hopefully be
cleaned up in the future, as the number of assigned devices is a terrible
heuristic for detecting if a VM has access to host MMIO.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 +-
arch/x86/include/asm/kvm_host.h | 4 +++-
arch/x86/kvm/irq.c | 9 ++++++++-
arch/x86/kvm/vmx/main.c | 2 +-
arch/x86/kvm/vmx/posted_intr.c | 16 ++++++++++------
arch/x86/kvm/vmx/posted_intr.h | 2 +-
arch/x86/kvm/x86.c | 3 +--
7 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 8d50e3e0a19b..8897f509860c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -112,7 +112,7 @@ KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
KVM_X86_OP_OPTIONAL(pi_update_irte)
-KVM_X86_OP_OPTIONAL(pi_start_assignment)
+KVM_X86_OP_OPTIONAL(pi_start_bypass)
KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c722adfedd96..01edcefbd937 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1382,6 +1382,8 @@ struct kvm_arch {
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
atomic_t assigned_device_count;
+ unsigned long nr_possible_bypass_irqs;
+
#ifdef CONFIG_KVM_IOAPIC
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
@@ -1855,7 +1857,7 @@ struct kvm_x86_ops {
int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
struct kvm_vcpu *vcpu, u32 vector);
- void (*pi_start_assignment)(struct kvm *kvm);
+ void (*pi_start_bypass)(struct kvm *kvm);
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 02efa7162252..3e4338c6a712 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -570,10 +570,15 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
spin_lock_irq(&kvm->irqfds.lock);
irqfd->producer = prod;
+ if (!kvm->arch.nr_possible_bypass_irqs++)
+ kvm_x86_call(pi_start_bypass)(kvm);
+
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry);
- if (ret)
+ if (ret) {
+ kvm->arch.nr_possible_bypass_irqs--;
kvm_arch_end_assignment(irqfd->kvm);
+ }
}
spin_unlock_irq(&kvm->irqfds.lock);
@@ -606,6 +611,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
}
irqfd->producer = NULL;
+ kvm->arch.nr_possible_bypass_irqs--;
+
spin_unlock_irq(&kvm->irqfds.lock);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index d1e02e567b57..a986fc45145e 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -1014,7 +1014,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.nested_ops = &vmx_nested_ops,
.pi_update_irte = vmx_pi_update_irte,
- .pi_start_assignment = vmx_pi_start_assignment,
+ .pi_start_bypass = vmx_pi_start_bypass,
#ifdef CONFIG_X86_64
.set_hv_timer = vt_op(set_hv_timer),
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 3a23c30f73cb..5671d59a6b6d 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -146,8 +146,13 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
{
+ /*
+ * Note, reading the number of possible bypass IRQs can race with a
+ * bypass IRQ being attached to the VM. vmx_pi_start_bypass() ensures
+ * blockng vCPUs will see an elevated count or get KVM_REQ_UNBLOCK.
+ */
return irqchip_in_kernel(kvm) && kvm_arch_has_irq_bypass() &&
- kvm_arch_has_assigned_device(kvm);
+ READ_ONCE(kvm->arch.nr_possible_bypass_irqs);
}
/*
@@ -285,12 +290,11 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
/*
- * Bail out of the block loop if the VM has an assigned
- * device, but the blocking vCPU didn't reconfigure the
- * PI.NV to the wakeup vector, i.e. the assigned device
- * came along after the initial check in vmx_vcpu_pi_put().
+ * Kick all vCPUs when the first possible bypass IRQ is attached to a VM, as
+ * blocking vCPUs may scheduled out without reconfiguring PID.NV to the wakeup
+ * vector, i.e. if the bypass IRQ came along after vmx_vcpu_pi_put().
*/
-void vmx_pi_start_assignment(struct kvm *kvm)
+void vmx_pi_start_bypass(struct kvm *kvm)
{
if (!kvm_arch_has_irq_bypass())
return;
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 94ed66ea6249..a4af39948cf0 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -17,7 +17,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
struct kvm_vcpu *vcpu, u32 vector);
-void vmx_pi_start_assignment(struct kvm *kvm);
+void vmx_pi_start_bypass(struct kvm *kvm);
static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d3571c484790..59391cb56674 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13440,8 +13440,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
void kvm_arch_start_assignment(struct kvm *kvm)
{
- if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
- kvm_x86_call(pi_start_assignment)(kvm);
+ atomic_inc(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
--
2.50.0.rc1.591.g9c95f17f64-goog
next prev parent reply other threads:[~2025-06-11 22:48 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-11 22:45 [PATCH v3 00/62] KVM: iommu: Overhaul device posted IRQs support Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 01/62] KVM: arm64: Explicitly treat routing entry type changes as changes Sean Christopherson
2025-06-13 19:43 ` Oliver Upton
2025-06-19 12:36 ` (subset) " Marc Zyngier
2025-06-11 22:45 ` [PATCH v3 02/62] KVM: arm64: WARN if unmapping vLPI fails Sean Christopherson
2025-06-12 11:59 ` Marc Zyngier
2025-06-12 14:34 ` Sean Christopherson
2025-06-13 20:47 ` Oliver Upton
2025-06-20 17:22 ` Sean Christopherson
2025-06-20 18:00 ` David Woodhouse
2025-06-20 18:48 ` Oliver Upton
2025-06-20 19:04 ` Sean Christopherson
2025-06-20 19:27 ` Oliver Upton
2025-06-20 20:31 ` Sean Christopherson
2025-06-20 20:45 ` Oliver Upton
2025-06-11 22:45 ` [PATCH v3 03/62] KVM: Pass new routing entries and irqfd when updating IRTEs Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 04/62] KVM: SVM: Track per-vCPU IRTEs using kvm_kernel_irqfd structure Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 05/62] KVM: SVM: Delete IRTE link from previous vCPU before setting new IRTE Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 06/62] iommu/amd: KVM: SVM: Delete now-unused cached/previous GA tag fields Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 07/62] KVM: SVM: Delete IRTE link from previous vCPU irrespective of new routing Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 08/62] KVM: SVM: Drop pointless masking of default APIC base when setting V_APIC_BAR Sean Christopherson
2025-06-13 14:15 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 09/62] KVM: SVM: Drop pointless masking of kernel page pa's with AVIC HPA masks Sean Christopherson
2025-06-13 14:37 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 10/62] KVM: SVM: Add helper to deduplicate code for getting AVIC backing page Sean Christopherson
2025-06-13 14:38 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 11/62] KVM: SVM: Drop vcpu_svm's pointless avic_backing_page field Sean Christopherson
2025-06-13 14:44 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 12/62] KVM: SVM: Inhibit AVIC if ID is too big instead of rejecting vCPU creation Sean Christopherson
2025-06-17 14:25 ` Naveen N Rao
2025-06-17 16:10 ` Sean Christopherson
2025-06-18 14:33 ` Naveen N Rao
2025-06-18 20:59 ` Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 13/62] KVM: SVM: Drop redundant check in AVIC code on ID during " Sean Christopherson
2025-06-17 14:49 ` Naveen N Rao
2025-06-17 16:33 ` Sean Christopherson
2025-06-18 14:39 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 14/62] KVM: SVM: Track AVIC tables as natively sized pointers, not "struct pages" Sean Christopherson
2025-06-17 15:01 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 15/62] KVM: SVM: Drop superfluous "cache" of AVIC Physical ID entry pointer Sean Christopherson
2025-06-19 11:09 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 16/62] KVM: VMX: Move enable_ipiv knob to common x86 Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 17/62] KVM: SVM: Add enable_ipiv param, never set IsRunning if disabled Sean Christopherson
2025-06-19 11:31 ` Naveen N Rao
2025-06-19 12:01 ` Naveen N Rao
2025-06-20 14:39 ` Sean Christopherson
2025-06-23 10:45 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 18/62] KVM: SVM: Disable (x2)AVIC IPI virtualization if CPU has erratum #1235 Sean Christopherson
2025-06-23 14:05 ` Naveen N Rao
2025-06-23 15:30 ` Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 19/62] KVM: VMX: Suppress PI notifications whenever the vCPU is put Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 20/62] KVM: SVM: Add a comment to explain why avic_vcpu_blocking() ignores IRQ blocking Sean Christopherson
2025-06-23 15:54 ` Naveen N Rao
2025-06-23 16:18 ` Sean Christopherson
2025-06-25 15:28 ` Naveen N Rao
2025-06-11 22:45 ` [PATCH v3 21/62] iommu/amd: KVM: SVM: Use pi_desc_addr to derive ga_root_ptr Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 22/62] iommu/amd: KVM: SVM: Pass NULL @vcpu_info to indicate "not guest mode" Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 23/62] KVM: SVM: Stop walking list of routing table entries when updating IRTE Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 24/62] KVM: VMX: " Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 25/62] KVM: SVM: Extract SVM specific code out of get_pi_vcpu_info() Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 26/62] KVM: x86: Move IRQ routing/delivery APIs from x86.c => irq.c Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 27/62] KVM: x86: Nullify irqfd->producer after updating IRTEs Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 28/62] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 29/62] KVM: x86: Move posted interrupt tracepoint to common code Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 30/62] KVM: SVM: Clean up return handling in avic_pi_update_irte() Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 31/62] iommu: KVM: Split "struct vcpu_data" into separate AMD vs. Intel structs Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 32/62] KVM: Don't WARN if updating IRQ bypass route fails Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 33/62] KVM: Fold kvm_arch_irqfd_route_changed() into kvm_arch_update_irqfd_routing() Sean Christopherson
2025-06-13 20:50 ` Oliver Upton
2025-06-11 22:45 ` [PATCH v3 34/62] KVM: x86: Track irq_bypass_vcpu in common x86 code Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 35/62] KVM: x86: Skip IOMMU IRTE updates if there's no old or new vCPU being targeted Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 36/62] KVM: x86: Don't update IRTE entries when old and new routes were !MSI Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 37/62] KVM: SVM: Revert IRTE to legacy mode if IOMMU doesn't provide IR metadata Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 38/62] KVM: SVM: Take and hold ir_list_lock across IRTE updates in IOMMU Sean Christopherson
2025-06-17 15:42 ` Naveen N Rao
2025-12-22 9:16 ` Ankit Soni
2025-12-22 14:09 ` possible deadlock due to irq_set_thread_affinity() calling into the scheduler (was Re: [PATCH v3 38/62] KVM: SVM: Take and hold ir_list_lock across IRTE updates in IOMMU) Paolo Bonzini
2025-12-22 19:34 ` Sean Christopherson
2025-12-22 21:15 ` Paolo Bonzini
2025-12-22 22:10 ` Sean Christopherson
2025-12-23 8:59 ` Ankit Soni
2026-01-08 21:28 ` Thomas Gleixner
2026-01-08 21:53 ` Thomas Gleixner
2026-01-21 15:53 ` Paolo Bonzini
2026-01-21 18:13 ` Paolo Bonzini
2026-01-22 10:19 ` Marc Zyngier
2026-01-22 18:47 ` Thomas Gleixner
2026-01-24 7:49 ` Paolo Bonzini
2025-06-11 22:45 ` [PATCH v3 39/62] iommu/amd: Document which IRTE fields amd_iommu_update_ga() can modify Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 40/62] iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 41/62] iommu/amd: Factor out helper for manipulating IRTE GA/CPU info Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 42/62] iommu/amd: KVM: SVM: Set pCPU info in IRTE when setting vCPU affinity Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 43/62] iommu/amd: KVM: SVM: Add IRTE metadata to affined vCPU's list if AVIC is inhibited Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 44/62] KVM: SVM: Don't check for assigned device(s) when updating affinity Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 45/62] KVM: SVM: Don't check for assigned device(s) when activating AVIC Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 46/62] KVM: SVM: WARN if (de)activating guest mode in IOMMU fails Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 47/62] KVM: SVM: Process all IRTEs on affinity change even if one update fails Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 48/62] KVM: SVM: WARN if updating IRTE GA fields in IOMMU fails Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 49/62] KVM: x86: Drop superfluous "has assigned device" check in kvm_pi_update_irte() Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 50/62] KVM: x86: WARN if IRQ bypass isn't supported " Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 51/62] KVM: x86: WARN if IRQ bypass routing is updated without in-kernel local APIC Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 52/62] KVM: SVM: WARN if ir_list is non-empty at vCPU free Sean Christopherson
2025-06-11 22:45 ` Sean Christopherson [this message]
2025-06-11 22:45 ` [PATCH v3 54/62] KVM: VMX: WARN if VT-d Posted IRQs aren't possible when starting IRQ bypass Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 55/62] KVM: SVM: Use vcpu_idx, not vcpu_id, for GA log tag/metadata Sean Christopherson
2025-06-11 22:45 ` [PATCH v3 56/62] iommu/amd: WARN if KVM calls GA IRTE helpers without virtual APIC support Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 57/62] KVM: SVM: Fold avic_set_pi_irte_mode() into its sole caller Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 58/62] KVM: SVM: Don't check vCPU's blocking status when toggling AVIC on/off Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 59/62] KVM: SVM: Consolidate IRTE update " Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 60/62] iommu/amd: KVM: SVM: Allow KVM to control need for GA log interrupts Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 61/62] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking Sean Christopherson
2025-06-11 22:46 ` [PATCH v3 62/62] KVM: x86: Rename kvm_set_msi_irq() => kvm_msi_to_lapic_irq() Sean Christopherson
2025-06-24 19:38 ` [PATCH v3 00/62] KVM: iommu: Overhaul device posted IRQs support Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250611224604.313496-55-seanjc@google.com \
--to=seanjc@google.com \
--cc=baolu.lu@linux.intel.com \
--cc=dmatlack@google.com \
--cc=dwmw2@infradead.org \
--cc=francescolavra.fl@gmail.com \
--cc=iommu@lists.linux.dev \
--cc=joao.m.martins@oracle.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=mlevitsk@redhat.com \
--cc=oliver.upton@linux.dev \
--cc=pbonzini@redhat.com \
--cc=sarunkod@amd.com \
--cc=vasant.hegde@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox