From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>,
Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Sean Christopherson <seanjc@google.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
K Prateek Nayak <kprateek.nayak@amd.com>,
David Matlack <dmatlack@google.com>,
Juergen Gross <jgross@suse.com>,
Stefano Stabellini <sstabellini@kernel.org>,
Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Subject: [PATCH v2 04/12] KVM: Add irqfd to KVM's list via the vfs_poll() callback
Date: Mon, 19 May 2025 11:55:06 -0700 [thread overview]
Message-ID: <20250519185514.2678456-5-seanjc@google.com> (raw)
In-Reply-To: <20250519185514.2678456-1-seanjc@google.com>
Add the irqfd structure to KVM's list of irqfds in kvm_irqfd_register(),
i.e. via the vfs_poll() callback. This will allow taking irqfds.lock
across the entire registration sequence (add to waitqueue, add to list),
and more importantly will allow inserting into KVM's list if and only if
adding to the waitqueue succeeds (spoiler alert), without needing to
juggle return codes in weird ways.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
virt/kvm/eventfd.c | 102 +++++++++++++++++++++++++--------------------
1 file changed, 57 insertions(+), 45 deletions(-)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 8b9a87daa2bb..99274d60335d 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -245,34 +245,14 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
return ret;
}
-struct kvm_irqfd_pt {
- struct kvm_kernel_irqfd *irqfd;
- poll_table pt;
-};
-
-static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
- poll_table *pt)
-{
- struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt);
- struct kvm_kernel_irqfd *irqfd = p->irqfd;
-
- /*
- * Add the irqfd as a priority waiter on the eventfd, with a custom
- * wake-up handler, so that KVM *and only KVM* is notified whenever the
- * underlying eventfd is signaled.
- */
- init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
-
- add_wait_queue_priority(wqh, &irqfd->wait);
-}
-
-/* Must be called under irqfds.lock */
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
int n_entries;
+ lockdep_assert_held(&kvm->irqfds.lock);
+
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
write_seqcount_begin(&irqfd->irq_entry_sc);
@@ -286,6 +266,49 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
write_seqcount_end(&irqfd->irq_entry_sc);
}
+struct kvm_irqfd_pt {
+ struct kvm_kernel_irqfd *irqfd;
+ struct kvm *kvm;
+ poll_table pt;
+ int ret;
+};
+
+static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt);
+ struct kvm_kernel_irqfd *irqfd = p->irqfd;
+ struct kvm_kernel_irqfd *tmp;
+ struct kvm *kvm = p->kvm;
+
+ spin_lock_irq(&kvm->irqfds.lock);
+
+ list_for_each_entry(tmp, &kvm->irqfds.items, list) {
+ if (irqfd->eventfd != tmp->eventfd)
+ continue;
+ /* This fd is used for another irq already. */
+ p->ret = -EBUSY;
+ spin_unlock_irq(&kvm->irqfds.lock);
+ return;
+ }
+
+ irqfd_update(kvm, irqfd);
+
+ list_add_tail(&irqfd->list, &kvm->irqfds.items);
+
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+ /*
+ * Add the irqfd as a priority waiter on the eventfd, with a custom
+ * wake-up handler, so that KVM *and only KVM* is notified whenever the
+ * underlying eventfd is signaled.
+ */
+ init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
+
+ add_wait_queue_priority(wqh, &irqfd->wait);
+ p->ret = 0;
+}
+
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
struct irq_bypass_consumer *cons)
@@ -315,7 +338,7 @@ bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
static int
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
{
- struct kvm_kernel_irqfd *irqfd, *tmp;
+ struct kvm_kernel_irqfd *irqfd;
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
struct kvm_irqfd_pt irqfd_pt;
int ret;
@@ -414,32 +437,22 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
*/
idx = srcu_read_lock(&kvm->irq_srcu);
- spin_lock_irq(&kvm->irqfds.lock);
-
- ret = 0;
- list_for_each_entry(tmp, &kvm->irqfds.items, list) {
- if (irqfd->eventfd != tmp->eventfd)
- continue;
- /* This fd is used for another irq already. */
- ret = -EBUSY;
- goto fail_duplicate;
- }
-
- irqfd_update(kvm, irqfd);
-
- list_add_tail(&irqfd->list, &kvm->irqfds.items);
-
- spin_unlock_irq(&kvm->irqfds.lock);
-
/*
- * Register the irqfd with the eventfd by polling on the eventfd. If
- * there was en event pending on the eventfd prior to registering,
- * manually trigger IRQ injection.
+ * Register the irqfd with the eventfd by polling on the eventfd, and
+ * simultaneously and the irqfd to KVM's list. If there was en event
+ * pending on the eventfd prior to registering, manually trigger IRQ
+ * injection.
*/
irqfd_pt.irqfd = irqfd;
+ irqfd_pt.kvm = kvm;
init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register);
events = vfs_poll(fd_file(f), &irqfd_pt.pt);
+
+ ret = irqfd_pt.ret;
+ if (ret)
+ goto fail_poll;
+
if (events & EPOLLIN)
schedule_work(&irqfd->inject);
@@ -460,8 +473,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
srcu_read_unlock(&kvm->irq_srcu, idx);
return 0;
-fail_duplicate:
- spin_unlock_irq(&kvm->irqfds.lock);
+fail_poll:
srcu_read_unlock(&kvm->irq_srcu, idx);
fail:
if (irqfd->resampler)
--
2.49.0.1101.gccaa498523-goog
next prev parent reply other threads:[~2025-05-19 19:56 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-19 18:55 [PATCH v2 00/12] KVM: Make irqfd registration globally unique Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 01/12] KVM: Use a local struct to do the initial vfs_poll() on an irqfd Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 02/12] KVM: Acquire SCRU lock outside of irqfds.lock during assignment Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 03/12] KVM: Initialize irqfd waitqueue callback when adding to the queue Sean Christopherson
2025-05-19 18:55 ` Sean Christopherson [this message]
2025-05-19 18:55 ` [PATCH v2 05/12] KVM: Add irqfd to eventfd's waitqueue while holding irqfds.lock Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 06/12] sched/wait: Add a waitqueue helper for fully exclusive priority waiters Sean Christopherson
2025-05-20 19:17 ` Peter Zijlstra
2025-05-20 20:57 ` Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 07/12] KVM: Disallow binding multiple irqfds to an eventfd with a priority waiter Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 08/12] sched/wait: Drop WQ_FLAG_EXCLUSIVE from add_wait_queue_priority() Sean Christopherson
2025-05-20 19:18 ` Peter Zijlstra
2025-05-20 22:20 ` Sean Christopherson
2025-05-21 11:42 ` Peter Zijlstra
2025-05-21 14:44 ` Michael Kelley
2025-05-21 15:05 ` Sean Christopherson
2025-05-21 13:22 ` Jürgen Groß
2025-05-19 18:55 ` [PATCH v2 09/12] KVM: Drop sanity check that per-VM list of irqfds is unique Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 10/12] KVM: selftests: Assert that eventfd() succeeds in Xen shinfo test Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 11/12] KVM: selftests: Add utilities to create eventfds and do KVM_IRQFD Sean Christopherson
2025-05-19 18:55 ` [PATCH v2 12/12] KVM: selftests: Add a KVM_IRQFD test to verify uniqueness requirements Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250519185514.2678456-5-seanjc@google.com \
--to=seanjc@google.com \
--cc=dmatlack@google.com \
--cc=jgross@suse.com \
--cc=juri.lelli@redhat.com \
--cc=kprateek.nayak@amd.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=mingo@redhat.com \
--cc=oleksandr_tyshchenko@epam.com \
--cc=oliver.upton@linux.dev \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=sstabellini@kernel.org \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).