All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Sasha Levin <levinsasha928@gmail.com>
Cc: kvm@vger.kernel.org, Avi Kivity <avi@redhat.com>,
	Ingo Molnar <mingo@elte.hu>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Pekka Enberg <penberg@kernel.org>
Subject: Re: [PATCH] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_PIPE
Date: Mon, 4 Jul 2011 13:32:07 +0300	[thread overview]
Message-ID: <20110704103207.GA11386@redhat.com> (raw)
In-Reply-To: <1309712689-4290-1-git-send-email-levinsasha928@gmail.com>

On Sun, Jul 03, 2011 at 08:04:49PM +0300, Sasha Levin wrote:
> The new flag allows passing a write side of a pipe instead of an
> eventfd to be notified of writes to the specified memory region.
> 
> Instead of signaling an event, the value written to the memory region
> is written to the pipe.
> 
> Using a pipe instead of an eventfd is usefull when any value can be
> written to the memory region but we're interested in recieving the
> actual value instead of just a notification.
> 
> A simple example for practical use is the serial port. we are not
> interested in an exit every time a char is written to the port, but
> we do need to know what was written so we could handle it on the guest.

Looking at this example, how would you handle a pipe full condition?
We can't buffer unlimited amount of data in the host.

> Cc: Avi Kivity <avi@redhat.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Marcelo Tosatti <mtosatti@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Pekka Enberg <penberg@kernel.org>
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> ---
>  include/linux/kvm.h |    2 +
>  virt/kvm/eventfd.c  |   65 +++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 47 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 55ef181..548f23a 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -387,12 +387,14 @@ enum {
>  	kvm_ioeventfd_flag_nr_datamatch,
>  	kvm_ioeventfd_flag_nr_pio,
>  	kvm_ioeventfd_flag_nr_deassign,
> +	kvm_ioeventfd_flag_nr_pipe,
>  	kvm_ioeventfd_flag_nr_max,
>  };
>  
>  #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
>  #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
>  #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
> +#define KVM_IOEVENTFD_FLAG_PIPE      (1 << kvm_ioeventfd_flag_nr_pipe)
>  
>  #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
>  
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..434293e 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -413,10 +413,11 @@ module_exit(irqfd_module_exit);
>  
>  /*
>   * --------------------------------------------------------------------
> - * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
> + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
> + *            a pipe write.
>   *
> - * userspace can register a PIO/MMIO address with an eventfd for receiving
> - * notification when the memory has been touched.
> + * userspace can register a PIO/MMIO address with an eventfd or a 
> + * pipe for receiving notification when the memory has been touched.
>   * --------------------------------------------------------------------
>   */
>  
> @@ -424,6 +425,7 @@ struct _ioeventfd {
>  	struct list_head     list;
>  	u64                  addr;
>  	int                  length;
> +	struct file         *pipe;
>  	struct eventfd_ctx  *eventfd;
>  	u64                  datamatch;
>  	struct kvm_io_device dev;
> @@ -439,7 +441,11 @@ to_ioeventfd(struct kvm_io_device *dev)
>  static void
>  ioeventfd_release(struct _ioeventfd *p)
>  {
> -	eventfd_ctx_put(p->eventfd);
> +	if (p->eventfd)
> +		eventfd_ctx_put(p->eventfd);
> +	else
> +		fput(p->pipe);
> +
>  	list_del(&p->list);
>  	kfree(p);
>  }
> @@ -481,6 +487,21 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
>  	return _val == p->datamatch ? true : false;
>  }
>  
> +static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> +			    loff_t pos)
> +{
> +	mm_segment_t old_fs;
> +	ssize_t res;
> +
> +	old_fs = get_fs();
> +	set_fs(get_ds());
> +	/* The cast to a user pointer is valid due to the set_fs() */

Interesting. Is buf really always a user pointer?
Why don't we tag it __user then?

> +	res = vfs_write(file, (const char __user *)buf, count, &pos);

If pipe is non-blocking, or if we get a signal,
this might fail or return a value < len.
Data will be lost then, won't it?

> +	set_fs(old_fs);
> +
> +	return res;
> +}
> +
>  /* MMIO/PIO writes trigger an event if the addr/val match */
>  static int
>  ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
> @@ -491,7 +512,11 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
>  	if (!ioeventfd_in_range(p, addr, len, val))
>  		return -EOPNOTSUPP;
>  
> -	eventfd_signal(p->eventfd, 1);
> +	if (p->pipe)
> +		kernel_write(p->pipe, val, len, 0);
> +	else
> +		eventfd_signal(p->eventfd, 1);
> +
>  	return 0;
>  }
>  
> @@ -533,7 +558,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>  	struct _ioeventfd        *p;
> -	struct eventfd_ctx       *eventfd;
> +	struct eventfd_ctx       *eventfd = NULL;
>  	int                       ret;
>  
>  	/* must be natural-word sized */
> @@ -555,9 +580,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
>  		return -EINVAL;
>  
> -	eventfd = eventfd_ctx_fdget(args->fd);
> -	if (IS_ERR(eventfd))
> -		return PTR_ERR(eventfd);
> +	if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE)) {
> +		eventfd = eventfd_ctx_fdget(args->fd);
> +		if (IS_ERR(eventfd))
> +			return PTR_ERR(eventfd);
> +	}
>  
>  	p = kzalloc(sizeof(*p), GFP_KERNEL);
>  	if (!p) {
> @@ -568,7 +595,11 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	INIT_LIST_HEAD(&p->list);
>  	p->addr    = args->addr;
>  	p->length  = args->len;
> -	p->eventfd = eventfd;
> +
> +	if (args->flags & KVM_IOEVENTFD_FLAG_PIPE)
> +		p->pipe = fget(args->fd);

This really needs to check that the fd is a pipe.
Otherwise you can do weird things like pass in
the kvm device fd itself.


> +	else
> +		p->eventfd = eventfd;
>  
>  	/* The datamatch feature is optional, otherwise this is a wildcard */
>  	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
> @@ -601,7 +632,9 @@ unlock_fail:
>  
>  fail:
>  	kfree(p);
> -	eventfd_ctx_put(eventfd);
> +
> +	if (!(args->flags & KVM_IOEVENTFD_FLAG_PIPE))
> +		eventfd_ctx_put(eventfd);
>  
>  	return ret;
>  }
> @@ -612,20 +645,14 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
>  	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
>  	struct _ioeventfd        *p, *tmp;
> -	struct eventfd_ctx       *eventfd;
>  	int                       ret = -ENOENT;
>  
> -	eventfd = eventfd_ctx_fdget(args->fd);
> -	if (IS_ERR(eventfd))
> -		return PTR_ERR(eventfd);
> -
>  	mutex_lock(&kvm->slots_lock);
>  
>  	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
>  		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
>  
> -		if (p->eventfd != eventfd  ||
> -		    p->addr != args->addr  ||
> +		if (p->addr != args->addr  ||
>  		    p->length != args->len ||
>  		    p->wildcard != wildcard)
>  			continue;
> @@ -641,8 +668,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  
>  	mutex_unlock(&kvm->slots_lock);
>  
> -	eventfd_ctx_put(eventfd);
> -

Don't we need to put on deassign?

>  	return ret;
>  }
>  
> -- 
> 1.7.6

  parent reply	other threads:[~2011-07-04 10:31 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-03 17:04 [PATCH] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_PIPE Sasha Levin
2011-07-03 17:16 ` Avi Kivity
2011-07-03 17:44   ` Sasha Levin
2011-07-03 17:57     ` Pekka Enberg
2011-07-04 10:27     ` Avi Kivity
2011-07-04 10:49       ` Michael S. Tsirkin
2011-07-04 10:57         ` Avi Kivity
2011-07-04 14:38       ` Sasha Levin
2011-07-04 14:45         ` Avi Kivity
2011-07-04 14:52           ` Sasha Levin
2011-07-04 14:59             ` Avi Kivity
2011-07-06  4:37               ` Sasha Levin
2011-07-06 11:30                 ` Avi Kivity
2011-07-04 10:32 ` Michael S. Tsirkin [this message]
2011-07-04 10:45   ` Avi Kivity
2011-07-04 11:07     ` Michael S. Tsirkin
2011-07-04 11:19       ` Avi Kivity
2011-07-04 11:45         ` Michael S. Tsirkin
2011-07-04 11:49           ` Avi Kivity
2011-07-04 12:12             ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110704103207.GA11386@redhat.com \
    --to=mst@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=levinsasha928@gmail.com \
    --cc=mingo@elte.hu \
    --cc=mtosatti@redhat.com \
    --cc=penberg@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.