From: Jan Kiszka <jan.kiszka@web.de>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: avi@redhat.com, mtosatti@redhat.com, kvm@vger.kernel.org, mst@redhat.com
Subject: Re: [RFC PATCH] kvm: Extend irqfd to support level interrupts
Date: Sun, 17 Jun 2012 13:17:53 +0200 [thread overview]
Message-ID: <4FDDBCE1.7030909@web.de> (raw)
In-Reply-To: <20120616163230.15204.61075.stgit@bling.home>
[-- Attachment #1: Type: text/plain, Size: 10002 bytes --]
On 2012-06-16 18:34, Alex Williamson wrote:
> I'm looking for opinions on this approach. For vfio device assignment
> we minimally need a way to get EOIs from the in-kernel irqchip out to
> userspace. Getting that out via an eventfd would allow us to bounce
> all level interrupts out to userspace, where we would de-assert the
> device interrupt in qemu and unmask the physical device. Ideally we
> could deassert the interrupt in KVM, which allows us to send the EOI
> directly to vfio. To do that, we need to use a new IRQ source ID so
> the guest sees the logical OR of qemu requested state and external
> device state. This allows external devices to share interrupts with
> emulated devices, just like KVM assignment. That means the means we
> also need to use a new source ID when injecting the interrupt via
> irqfd.
>
> Rather than creating a source ID allocation interface, extending irqfd
> to support a user supplied source ID, and creating another new
> interface to get the EOI out, I think it works out better to bundle
> these all together as just a level irqfd interface. This way we don't
> allow users to create unbalanced states where a level interrupt is
> asserted, but has no way of being deasserted. I believe the below
> does this, but needs testing and validation with an implementation in
> qemu.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>
> arch/x86/kvm/x86.c | 1 +
> include/linux/kvm.h | 6 +++
> include/linux/kvm_host.h | 4 +-
> virt/kvm/eventfd.c | 90 ++++++++++++++++++++++++++++++++++++++--------
> virt/kvm/kvm_main.c | 2 +
> 5 files changed, 84 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a01a424..80bed07 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2148,6 +2148,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> case KVM_CAP_GET_TSC_KHZ:
> case KVM_CAP_PCI_2_3:
> case KVM_CAP_KVMCLOCK_CTRL:
> + case KVM_CAP_IRQFD_LEVEL:
> r = 1;
> break;
> case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2ce09aa..cca49a1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_GET_SMMU_INFO 78
> #define KVM_CAP_S390_COW 79
> #define KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_IRQFD_LEVEL 81
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -683,12 +684,15 @@ struct kvm_xen_hvm_config {
> #endif
>
> #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> +/* Available with KVM_CAP_IRQFD_LEVEL */
> +#define KVM_IRQFD_FLAG_LEVEL (1 << 1)
>
> struct kvm_irqfd {
> __u32 fd;
> __u32 gsi;
> __u32 flags;
> - __u8 pad[20];
> + __u32 eoi_fd;
> + __u8 pad[16];
> };
>
> struct kvm_clock_data {
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 27ac8a4..ae3b426 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -824,7 +824,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
> #ifdef CONFIG_HAVE_KVM_EVENTFD
>
> void kvm_eventfd_init(struct kvm *kvm);
> -int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
> +int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
> void kvm_irqfd_release(struct kvm *kvm);
> void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
> int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
> @@ -833,7 +833,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
>
> static inline void kvm_eventfd_init(struct kvm *kvm) {}
>
> -static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> +static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
> {
> return -EINVAL;
> }
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f59c1e8..89a6fa9 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -49,9 +49,13 @@ struct _irqfd {
> wait_queue_t wait;
> /* Update side is protected by irqfds.lock */
> struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> - /* Used for level IRQ fast-path */
> + /* Used for IRQ fast-path */
> int gsi;
> struct work_struct inject;
> + /* Used for level EOI path */
> + int irq_source_id;
> + struct eventfd_ctx *eoi_eventfd;
> + struct kvm_irq_ack_notifier notifier;
> /* Used for setup/shutdown */
> struct eventfd_ctx *eventfd;
> struct list_head list;
> @@ -62,7 +66,7 @@ struct _irqfd {
> static struct workqueue_struct *irqfd_cleanup_wq;
>
> static void
> -irqfd_inject(struct work_struct *work)
> +irqfd_inject_edge(struct work_struct *work)
> {
> struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> struct kvm *kvm = irqfd->kvm;
> @@ -71,6 +75,23 @@ irqfd_inject(struct work_struct *work)
> kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> }
>
> +static void
> +irqfd_inject_level(struct work_struct *work)
> +{
> + struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> +
> + kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 1);
> +}
> +
> +static void
> +irqfd_ack_level(struct kvm_irq_ack_notifier *notifier)
> +{
> + struct _irqfd *irqfd = container_of(notifier, struct _irqfd, notifier);
> +
> + kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 0);
> + eventfd_signal(irqfd->eoi_eventfd, 1);
> +}
> +
> /*
> * Race-free decouple logic (ordering is critical)
> */
> @@ -96,6 +117,14 @@ irqfd_shutdown(struct work_struct *work)
> * It is now safe to release the object's resources
> */
> eventfd_ctx_put(irqfd->eventfd);
> +
> + if (irqfd->eoi_eventfd) {
> + kvm_unregister_irq_ack_notifier(irqfd->kvm, &irqfd->notifier);
> + eventfd_ctx_put(irqfd->eoi_eventfd);
> + kvm_set_irq(irqfd->kvm, irqfd->irq_source_id, irqfd->gsi, 0);
> + kvm_free_irq_source_id(irqfd->kvm, irqfd->irq_source_id);
> + }
> +
> kfree(irqfd);
> }
>
> @@ -198,13 +227,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
> }
>
> static int
> -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> +kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
> {
> struct kvm_irq_routing_table *irq_rt;
> struct _irqfd *irqfd, *tmp;
> struct file *file = NULL;
> - struct eventfd_ctx *eventfd = NULL;
> - int ret;
> + struct eventfd_ctx *eventfd = NULL, *eoi_eventfd = NULL;
> + int ret, irq_source_id = -1;
> unsigned int events;
>
> irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
> @@ -212,12 +241,35 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> return -ENOMEM;
>
> irqfd->kvm = kvm;
> - irqfd->gsi = gsi;
> + irqfd->gsi = args->gsi;
> INIT_LIST_HEAD(&irqfd->list);
> - INIT_WORK(&irqfd->inject, irqfd_inject);
> +
> + if (args->flags & KVM_IRQFD_FLAG_LEVEL) {
> + irq_source_id = kvm_request_irq_source_id(kvm);
> + if (irq_source_id < 0) {
> + ret = irq_source_id;
> + goto fail;
> + }
> +
> + eoi_eventfd = eventfd_ctx_fdget(args->eoi_fd);
> + if (IS_ERR(eoi_eventfd)) {
> + ret = PTR_ERR(eoi_eventfd);
> + goto fail;
> + }
> +
> + irqfd->irq_source_id = irq_source_id;
> + irqfd->eoi_eventfd = eoi_eventfd;
> + irqfd->notifier.gsi = args->gsi;
> + irqfd->notifier.irq_acked = irqfd_ack_level;
> + kvm_register_irq_ack_notifier(kvm, &irqfd->notifier);
> +
> + INIT_WORK(&irqfd->inject, irqfd_inject_level);
> + } else
> + INIT_WORK(&irqfd->inject, irqfd_inject_edge);
> +
> INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
>
> - file = eventfd_fget(fd);
> + file = eventfd_fget(args->fd);
> if (IS_ERR(file)) {
> ret = PTR_ERR(file);
> goto fail;
> @@ -282,6 +334,14 @@ fail:
> if (!IS_ERR(file))
> fput(file);
>
> + if (eoi_eventfd && !IS_ERR(eoi_eventfd)) {
> + kvm_unregister_irq_ack_notifier(kvm, &irqfd->notifier);
> + eventfd_ctx_put(eoi_eventfd);
> + }
> +
> + if (irq_source_id >= 0)
> + kvm_free_irq_source_id(kvm, irq_source_id);
> +
> kfree(irqfd);
> return ret;
> }
> @@ -298,19 +358,19 @@ kvm_eventfd_init(struct kvm *kvm)
> * shutdown any irqfd's that match fd+gsi
> */
> static int
> -kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> +kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
> {
> struct _irqfd *irqfd, *tmp;
> struct eventfd_ctx *eventfd;
>
> - eventfd = eventfd_ctx_fdget(fd);
> + eventfd = eventfd_ctx_fdget(args->fd);
> if (IS_ERR(eventfd))
> return PTR_ERR(eventfd);
>
> spin_lock_irq(&kvm->irqfds.lock);
>
> list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> - if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> + if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
> /*
> * This rcu_assign_pointer is needed for when
> * another thread calls kvm_irq_routing_update before
> @@ -338,12 +398,12 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> }
>
> int
> -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> +kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
> {
> - if (flags & KVM_IRQFD_FLAG_DEASSIGN)
> - return kvm_irqfd_deassign(kvm, fd, gsi);
> + if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
> + return kvm_irqfd_deassign(kvm, args);
>
> - return kvm_irqfd_assign(kvm, fd, gsi);
> + return kvm_irqfd_assign(kvm, args);
> }
>
> /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 02cb440..b4ad14cc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2059,7 +2059,7 @@ static long kvm_vm_ioctl(struct file *filp,
> r = -EFAULT;
> if (copy_from_user(&data, argp, sizeof data))
> goto out;
> - r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
> + r = kvm_irqfd(kvm, &data);
> break;
> }
> case KVM_IOEVENTFD: {
>
Looks reasonable to me.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
next prev parent reply other threads:[~2012-06-17 11:23 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-16 16:34 [RFC PATCH] kvm: Extend irqfd to support level interrupts Alex Williamson
2012-06-17 11:17 ` Jan Kiszka [this message]
2012-06-17 18:44 ` Michael S. Tsirkin
2012-06-17 21:38 ` Alex Williamson
2012-06-17 22:15 ` Alex Williamson
2012-06-18 6:00 ` Michael S. Tsirkin
2012-06-18 14:00 ` Alex Williamson
2012-06-18 5:51 ` Michael S. Tsirkin
2012-06-18 14:06 ` Alex Williamson
2012-06-18 8:02 ` Avi Kivity
2012-06-18 8:52 ` Jan Kiszka
2012-06-18 9:33 ` Avi Kivity
2012-06-18 10:11 ` Michael S. Tsirkin
2012-06-18 10:14 ` Avi Kivity
2012-06-18 10:55 ` Michael S. Tsirkin
2012-06-18 11:03 ` Avi Kivity
2012-06-18 11:17 ` Michael S. Tsirkin
2012-06-18 14:32 ` Alex Williamson
2012-06-18 14:27 ` Alex Williamson
2012-06-18 14:33 ` Avi Kivity
2012-06-18 16:47 ` Alex Williamson
2012-06-18 14:23 ` Alex Williamson
2012-06-18 14:18 ` Alex Williamson
2012-06-18 14:35 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FDDBCE1.7030909@web.de \
--to=jan.kiszka@web.de \
--cc=alex.williamson@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).