From: "Michael S. Tsirkin" <mst@redhat.com>
To: Sasha Levin <levinsasha928@gmail.com>
Cc: kvm@vger.kernel.org, Avi Kivity <avi@redhat.com>,
Ingo Molnar <mingo@elte.hu>,
Marcelo Tosatti <mtosatti@redhat.com>,
Pekka Enberg <penberg@kernel.org>
Subject: Re: [PATCH] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_PIPE
Date: Mon, 4 Jul 2011 13:32:07 +0300 [thread overview]
Message-ID: <20110704103207.GA11386@redhat.com> (raw)
In-Reply-To: <1309712689-4290-1-git-send-email-levinsasha928@gmail.com>
On Sun, Jul 03, 2011 at 08:04:49PM +0300, Sasha Levin wrote:
> The new flag allows passing a write side of a pipe instead of an
> eventfd to be notified of writes to the specified memory region.
>
> Instead of signaling an event, the value written to the memory region
> is written to the pipe.
>
> Using a pipe instead of an eventfd is usefull when any value can be
> written to the memory region but we're interested in recieving the
> actual value instead of just a notification.
>
> A simple example for practical use is the serial port. we are not
> interested in an exit every time a char is written to the port, but
> we do need to know what was written so we could handle it on the guest.
Looking at this example, how would you handle a pipe full condition?
We can't buffer unlimited amount of data in the host.
> Cc: Avi Kivity <avi@redhat.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Marcelo Tosatti <mtosatti@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Pekka Enberg <penberg@kernel.org>
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> ---
> include/linux/kvm.h | 2 +
> virt/kvm/eventfd.c | 65 +++++++++++++++++++++++++++++++++++---------------
> 2 files changed, 47 insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 55ef181..548f23a 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -387,12 +387,14 @@ enum {
> kvm_ioeventfd_flag_nr_datamatch,
> kvm_ioeventfd_flag_nr_pio,
> kvm_ioeventfd_flag_nr_deassign,
> + kvm_ioeventfd_flag_nr_pipe,
> kvm_ioeventfd_flag_nr_max,
> };
>
> #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
> #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
> #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
> +#define KVM_IOEVENTFD_FLAG_PIPE (1 << kvm_ioeventfd_flag_nr_pipe)
>
> #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1)
>
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..434293e 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -413,10 +413,11 @@ module_exit(irqfd_module_exit);
>
> /*
> * --------------------------------------------------------------------
> - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
> + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
> + * a pipe write.
> *
> - * userspace can register a PIO/MMIO address with an eventfd for receiving
> - * notification when the memory has been touched.
> + * userspace can register a PIO/MMIO address with an eventfd or a
> + * pipe for receiving notification when the memory has been touched.
> * --------------------------------------------------------------------
> */
>
> @@ -424,6 +425,7 @@ struct _ioeventfd {
> struct list_head list;
> u64 addr;
> int length;
> + struct file *pipe;
> struct eventfd_ctx *eventfd;
> u64 datamatch;
> struct kvm_io_device dev;
> @@ -439,7 +441,11 @@ to_ioeventfd(struct kvm_io_device *dev)
> static void
> ioeventfd_release(struct _ioeventfd *p)
> {
> - eventfd_ctx_put(p->eventfd);
> + if (p->eventfd)
> + eventfd_ctx_put(p->eventfd);
> + else
> + fput(p->pipe);
> +
> list_del(&p->list);
> kfree(p);
> }
> @@ -481,6 +487,21 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
> return _val == p->datamatch ? true : false;
> }
>
> +static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> + loff_t pos)
> +{
> + mm_segment_t old_fs;
> + ssize_t res;
> +
> + old_fs = get_fs();
> + set_fs(get_ds());
> + /* The cast to a user pointer is valid due to the set_fs() */
Interesting. Is buf really always a user pointer?
Why don't we tag it __user then?
> + res = vfs_write(file, (const char __user *)buf, count, &pos);
If pipe is non-blocking, or if we get a signal,
this might fail or return a value < len.
Data will be lost then, won't it?
> + set_fs(old_fs);
> +
> + return res;
> +}
> +
> /* MMIO/PIO writes trigger an event if the addr/val match */
> static int
> ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
> @@ -491,7 +512,11 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
> if (!ioeventfd_in_range(p, addr, len, val))
> return -EOPNOTSUPP;
>
> - eventfd_signal(p->eventfd, 1);
> + if (p->pipe)
> + kernel_write(p->pipe, val, len, 0);
> + else
> + eventfd_signal(p->eventfd, 1);
> +
> return 0;
> }
>
> @@ -533,7 +558,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
> enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
> struct _ioeventfd *p;
> - struct eventfd_ctx *eventfd;
> + struct eventfd_ctx *eventfd = NULL;
> int ret;
>
> /* must be natural-word sized */
> @@ -555,9 +580,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
> return -EINVAL;
>
> - eventfd = eventfd_ctx_fdget(args->fd);
> - if (IS_ERR(eventfd))
> - return PTR_ERR(eventfd);
> + if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) {
> + eventfd = eventfd_ctx_fdget(args->fd);
> + if (IS_ERR(eventfd))
> + return PTR_ERR(eventfd);
> + }
>
> p = kzalloc(sizeof(*p), GFP_KERNEL);
> if (!p) {
> @@ -568,7 +595,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> INIT_LIST_HEAD(&p->list);
> p->addr = args->addr;
> p->length = args->len;
> - p->eventfd = eventfd;
> +
> + if (args->flags & KVM_IOEVENTFD_FLAG_PIPE)
> + p->pipe = fget(args->fd);
This really needs to check that the fd is a pipe.
Otherwise you can do weird things like pass in
the kvm device fd itself.
> + else
> + p->eventfd = eventfd;
>
> /* The datamatch feature is optional, otherwise this is a wildcard */
> if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
> @@ -601,7 +632,9 @@ unlock_fail:
>
> fail:
> kfree(p);
> - eventfd_ctx_put(eventfd);
> +
> + if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE))
> + eventfd_ctx_put(eventfd);
>
> return ret;
> }
> @@ -612,20 +645,14 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
> enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
> struct _ioeventfd *p, *tmp;
> - struct eventfd_ctx *eventfd;
> int ret = -ENOENT;
>
> - eventfd = eventfd_ctx_fdget(args->fd);
> - if (IS_ERR(eventfd))
> - return PTR_ERR(eventfd);
> -
> mutex_lock(&kvm->slots_lock);
>
> list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
> bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
>
> - if (p->eventfd != eventfd ||
> - p->addr != args->addr ||
> + if (p->addr != args->addr ||
> p->length != args->len ||
> p->wildcard != wildcard)
> continue;
> @@ -641,8 +668,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>
> mutex_unlock(&kvm->slots_lock);
>
> - eventfd_ctx_put(eventfd);
> -
Don't we need to put on deassign?
> return ret;
> }
>
> --
> 1.7.6
next prev parent reply other threads:[~2011-07-04 10:31 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-03 17:04 [PATCH] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_PIPE Sasha Levin
2011-07-03 17:16 ` Avi Kivity
2011-07-03 17:44 ` Sasha Levin
2011-07-03 17:57 ` Pekka Enberg
2011-07-04 10:27 ` Avi Kivity
2011-07-04 10:49 ` Michael S. Tsirkin
2011-07-04 10:57 ` Avi Kivity
2011-07-04 14:38 ` Sasha Levin
2011-07-04 14:45 ` Avi Kivity
2011-07-04 14:52 ` Sasha Levin
2011-07-04 14:59 ` Avi Kivity
2011-07-06 4:37 ` Sasha Levin
2011-07-06 11:30 ` Avi Kivity
2011-07-04 10:32 ` Michael S. Tsirkin [this message]
2011-07-04 10:45 ` Avi Kivity
2011-07-04 11:07 ` Michael S. Tsirkin
2011-07-04 11:19 ` Avi Kivity
2011-07-04 11:45 ` Michael S. Tsirkin
2011-07-04 11:49 ` Avi Kivity
2011-07-04 12:12 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110704103207.GA11386@redhat.com \
--to=mst@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=levinsasha928@gmail.com \
--cc=mingo@elte.hu \
--cc=mtosatti@redhat.com \
--cc=penberg@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox