public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <levinsasha928@gmail.com>
To: kvm@vger.kernel.org
Cc: Sasha Levin <levinsasha928@gmail.com>,
	Avi Kivity <avi@redhat.com>, Ingo Molnar <mingo@elte.hu>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Pekka Enberg <penberg@kernel.org>
Subject: [PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET
Date: Wed,  6 Jul 2011 07:37:58 +0300	[thread overview]
Message-ID: <1309927078-5983-5-git-send-email-levinsasha928@gmail.com> (raw)
In-Reply-To: <1309927078-5983-1-git-send-email-levinsasha928@gmail.com>

The new flag allows passing a connected socket instead of an
eventfd to be notified of writes or reads to the specified memory region.

Instead of signaling an event, On write - the value written to the memory
region is written to the pipe.
On read - a notification of the read is sent to the host, and a response
is expected with the value to be 'read'.

Using a socket instead of an eventfd is usefull when any value can be
written to the memory region but we're interested in recieving the
actual value instead of just a notification.

A simple example for practical use is the serial port. we are not
interested in an exit every time a char is written to the port, but
we do need to know what was written so we could handle it on the guest.

Cc: Avi Kivity <avi@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 Documentation/virtual/kvm/api.txt |   18 ++++-
 include/linux/kvm.h               |    9 ++
 virt/kvm/eventfd.c                |  153 ++++++++++++++++++++++++++++++++-----
 3 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 317d86a..74f0946 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1330,7 +1330,7 @@ Returns: 0 on success, !0 on error
 
 This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
 within the guest.  A guest write in the registered address will signal the
-provided event instead of triggering an exit.
+provided event or write to the provided socket instead of triggering an exit.
 
 struct kvm_ioeventfd {
 	__u64 datamatch;
@@ -1341,6 +1341,13 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
 The following flags are defined:
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
@@ -1348,6 +1355,7 @@ The following flags are defined:
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
@@ -1359,6 +1367,14 @@ passed in datamatch.
 If the nowrite flag is set, the event won't be signaled when the specified address
 is being written to.
 
+If the socket flag is set, fd is expected to be a connected AF_UNIX
+SOCK_SEQPACKET socket. Once a guest write in the registered address is
+detected - a struct kvm_ioeventfd_data which describes the write will be
+written to the socket.
+On read, struct kvm_ioeventfd_data will be written with 'is_write = 0', and
+would wait for a response with a struct kvm_ioeventfd_data containing the
+value which should be 'read' by the guest.
+
 
 5. The kvm_run structure
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a12711..ff3d808 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -389,6 +389,7 @@ enum {
 	kvm_ioeventfd_flag_nr_deassign,
 	kvm_ioeventfd_flag_nr_read,
 	kvm_ioeventfd_flag_nr_nowrite,
+	kvm_ioeventfd_flag_nr_socket,
 	kvm_ioeventfd_flag_nr_max,
 };
 
@@ -397,6 +398,7 @@ enum {
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
 
@@ -409,6 +411,13 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+	__u64 data;
+	__u64 addr;
+	__u32 len;
+	__u8  is_write;
+};
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
 	/* in */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 5f2d203..d1d63b3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/net.h>
 
 #include "iodev.h"
 
@@ -413,10 +414,11 @@ module_exit(irqfd_module_exit);
 
 /*
  * --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
+ *            a socket write.
  *
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
+ * userspace can register a PIO/MMIO address with an eventfd or a
+ * socket for receiving notification when the memory has been touched.
  * --------------------------------------------------------------------
  */
 
@@ -424,7 +426,10 @@ struct _ioeventfd {
 	struct list_head     list;
 	u64                  addr;
 	int                  length;
-	struct eventfd_ctx  *eventfd;
+	union {
+		struct socket       *sock;
+		struct eventfd_ctx  *eventfd;
+	};
 	u64                  datamatch;
 	struct kvm_io_device dev;
 	bool                 wildcard;
@@ -441,7 +446,11 @@ to_ioeventfd(struct kvm_io_device *dev)
 static void
 ioeventfd_release(struct _ioeventfd *p)
 {
-	eventfd_ctx_put(p->eventfd);
+	if (p->eventfd)
+		eventfd_ctx_put(p->eventfd);
+	else
+		sockfd_put(p->sock);
+
 	list_del(&p->list);
 	kfree(p);
 }
@@ -510,12 +519,65 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 	return _val == p->datamatch ? true : false;
 }
 
+static ssize_t socket_write(struct socket *sock, const void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_sendmsg(sock, &msg, count);
+	set_fs(old_fs);
+
+	return res;
+}
+
+static ssize_t socket_read(struct socket *sock, void *buf, size_t count)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+	struct msghdr msg;
+	struct iovec iov;
+
+	iov = (struct iovec) {
+		.iov_base = (void *)buf,
+		.iov_len  = count,
+	};
+
+	msg = (struct msghdr) {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = sock_recvmsg(sock, &msg, count, 0);
+	set_fs(old_fs);
+
+	return res;
+}
+
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
 		const void *val)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;
 
 	/* Exit if signaling on writes isn't requested */
 	if (!p->track_writes)
@@ -524,7 +586,18 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
 	if (!ioeventfd_in_range(p, addr, len, val))
 		return -EOPNOTSUPP;
 
-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.data = get_val(val, len),
+		.addr = addr,
+		.len = len,
+		.is_write = 1,
+	};
+
+	if (p->sock)
+		socket_write(p->sock, &data, sizeof(data));
+	else
+		eventfd_signal(p->eventfd, 1);
+
 	return 0;
 }
 
@@ -534,6 +607,7 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
 		void *val)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
+	struct kvm_ioeventfd_data data;
 
 	/* Exit if signaling on reads isn't requested */
 	if (!p->track_reads)
@@ -542,7 +616,21 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
 	if (!ioeventfd_in_range(p, addr, len, val))
 		return -EOPNOTSUPP;
 
-	eventfd_signal(p->eventfd, 1);
+	data = (struct kvm_ioeventfd_data) {
+		.addr = addr,
+		.len = len,
+		.is_write = 0,
+	};
+
+	if (p->sock) {
+		socket_write(p->sock, &data, sizeof(data));
+		socket_read(p->sock, &data, sizeof(data));
+		set_val(val, len, data.data);
+	} else {
+		set_val(val, len, p->datamatch);
+		eventfd_signal(p->eventfd, 1);
+	}
+
 	return 0;
 }
 
@@ -585,7 +673,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
 	int                       ret;
 
 	/* check for range overflow */
@@ -596,10 +684,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
 		return -EINVAL;
 
-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
-
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
 		ret = -ENOMEM;
@@ -611,6 +695,20 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	p->length  = args->len;
 	p->eventfd = eventfd;
 
+	if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		p->sock = sockfd_lookup(args->fd, &ret);
+		if (ret)
+			goto fail;
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			goto fail;
+
+		p->eventfd = eventfd;
+	}
+
 	/* The datamatch feature is optional, otherwise this is a wildcard */
 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
 		p->datamatch = args->datamatch;
@@ -649,8 +747,14 @@ unlock_fail:
 	mutex_unlock(&kvm->slots_lock);
 
 fail:
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+
+	if (p->sock)
+		sockfd_put(p->sock);
+
+
 	kfree(p);
-	eventfd_ctx_put(eventfd);
 
 	return ret;
 }
@@ -661,12 +765,21 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
-	struct eventfd_ctx       *eventfd;
+	struct eventfd_ctx       *eventfd = NULL;
+	struct socket            *sock = NULL;
 	int                       ret = -ENOENT;
 
-	eventfd = eventfd_ctx_fdget(args->fd);
-	if (IS_ERR(eventfd))
-		return PTR_ERR(eventfd);
+	if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+		ret = 0;
+		sock = sockfd_lookup(args->fd, &ret);
+		if (ret)
+			return PTR_ERR(sock);
+	} else {
+		ret = -EINVAL;
+		eventfd = eventfd_ctx_fdget(args->fd);
+		if (IS_ERR(eventfd))
+			return PTR_ERR(eventfd);
+	}
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -674,6 +787,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
 
 		if (p->eventfd != eventfd  ||
+		    p->sock != sock        ||
 		    p->addr != args->addr  ||
 		    p->length != args->len ||
 		    p->wildcard != wildcard)
@@ -690,7 +804,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	mutex_unlock(&kvm->slots_lock);
 
-	eventfd_ctx_put(eventfd);
+	if (eventfd)
+		eventfd_ctx_put(eventfd);
+	if (sock)
+		sockfd_put(sock);
 
 	return ret;
 }
-- 
1.7.6


  parent reply	other threads:[~2011-07-06  4:38 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-06  4:37 [PATCH 1/5] ioeventfd: Remove natural sized length limitation Sasha Levin
2011-07-06  4:37 ` [PATCH 2/5] ioeventfd: Add helper functions for reading and writing Sasha Levin
2011-07-06  4:37 ` [PATCH 3/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_READ Sasha Levin
2011-07-06  4:37 ` [PATCH 4/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_NOWRITE Sasha Levin
2011-07-06  4:37 ` Sasha Levin [this message]
2011-07-06 11:42   ` [PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET Michael S. Tsirkin
2011-07-06 15:01     ` Sasha Levin
2011-07-06 17:58       ` Michael S. Tsirkin
2011-07-10  5:34         ` Sasha Levin
2011-07-10  8:05           ` Michael S. Tsirkin
2011-07-12 11:23             ` Sasha Levin
2011-07-12 11:26               ` Avi Kivity
2011-07-13  6:37                 ` Pekka Enberg
2011-07-13  6:45                   ` Pekka Enberg
2011-07-13  7:07                     ` Avi Kivity
2011-07-13  8:02                       ` Pekka Enberg
2011-07-13 12:57                         ` Avi Kivity
2011-07-13 13:00                           ` Pekka Enberg
2011-07-13 13:32                             ` Avi Kivity
2011-07-14  7:26                               ` Pekka Enberg
2011-07-14  8:07                                 ` Sasha Levin
2011-07-14  8:09                                 ` Avi Kivity
2011-07-14  8:14                                   ` Pekka Enberg
2011-07-14  8:28                                     ` Avi Kivity
2011-07-14  8:59                                       ` Pekka Enberg
2011-07-14  9:48                                         ` Avi Kivity
     [not found]                                           ` <CAOJsxLHSeRuTOoiJssyrELRx-eXok3WinLr_+_G4dB+yHNBKdg@mail.gmai! l.com>
2011-07-14 10:30                                           ` Pekka Enberg
2011-07-14 11:54                                             ` Avi Kivity
2011-07-14 12:32                                               ` Sasha Levin
2011-07-14 12:46                                                 ` Avi Kivity
2011-07-14 13:00                                                   ` Sasha Levin
2011-07-14 13:05                                                     ` Avi Kivity
2011-07-14 13:17                                                       ` Pekka Enberg
2011-07-14 13:23                                                         ` Avi Kivity
2011-07-20  2:52                                                           ` Anthony Liguori
2011-07-20  6:16                                                             ` Sasha Levin
2011-07-20  9:42                                                               ` Pekka Enberg
2011-07-14 12:37                                               ` Pekka Enberg
2011-07-14 12:48                                                 ` Avi Kivity
2011-07-14 12:52                                                   ` Pekka Enberg
2011-07-14 12:54                                                     ` Avi Kivity
2011-07-14  8:19                                   ` Gleb Natapov
2011-07-14  8:25                                   ` Michael S. Tsirkin
2011-07-14  8:29                                     ` Avi Kivity
2011-07-20  2:49                       ` Anthony Liguori
2011-07-20  9:44                         ` Pekka Enberg
2011-07-20 21:10                           ` Anthony Liguori
2011-07-25 12:10                       ` Sasha Levin
2011-07-25 12:16                         ` Avi Kivity
2011-07-25 12:26                           ` Sasha Levin
2011-07-25 13:04                             ` Avi Kivity
2011-07-13  7:51           ` Pekka Enberg
2011-07-13 10:04             ` Pekka Enberg
2011-07-13 10:26               ` Sasha Levin
2011-07-13 10:56                 ` Pekka Enberg
2011-07-13 11:14                   ` Pekka Enberg
2011-07-06 12:39   ` Avi Kivity
2011-07-06 12:58     ` Sasha Levin
2011-07-06 13:04       ` Avi Kivity
2011-07-06 13:00   ` Avi Kivity
2011-07-20  2:42   ` Anthony Liguori
2011-07-20  8:19     ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1309927078-5983-5-git-send-email-levinsasha928@gmail.com \
    --to=levinsasha928@gmail.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=penberg@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox