From: Sasha Levin <levinsasha928@gmail.com>
To: kvm@vger.kernel.org
Cc: Sasha Levin <levinsasha928@gmail.com>,
Avi Kivity <avi@redhat.com>, Ingo Molnar <mingo@elte.hu>,
Marcelo Tosatti <mtosatti@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Pekka Enberg <penberg@kernel.org>
Subject: [PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET
Date: Wed, 6 Jul 2011 07:37:58 +0300 [thread overview]
Message-ID: <1309927078-5983-5-git-send-email-levinsasha928@gmail.com> (raw)
In-Reply-To: <1309927078-5983-1-git-send-email-levinsasha928@gmail.com>
The new flag allows passing a connected socket instead of an
eventfd to be notified of writes or reads to the specified memory region.
Instead of signaling an event, On write - the value written to the memory
region is written to the pipe.
On read - a notification of the read is sent to the host, and a response
is expected with the value to be 'read'.
Using a socket instead of an eventfd is usefull when any value can be
written to the memory region but we're interested in recieving the
actual value instead of just a notification.
A simple example for practical use is the serial port. we are not
interested in an exit every time a char is written to the port, but
we do need to know what was written so we could handle it on the guest.
Cc: Avi Kivity <avi@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
Documentation/virtual/kvm/api.txt | 18 ++++-
include/linux/kvm.h | 9 ++
virt/kvm/eventfd.c | 153 ++++++++++++++++++++++++++++++++-----
3 files changed, 161 insertions(+), 19 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 317d86a..74f0946 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1330,7 +1330,7 @@ Returns: 0 on success, !0 on error
This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
within the guest. A guest write in the registered address will signal the
-provided event instead of triggering an exit.
+provided event or write to the provided socket instead of triggering an exit.
struct kvm_ioeventfd {
__u64 datamatch;
@@ -1341,6 +1341,13 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
+struct kvm_ioeventfd_data {
+ __u64 data;
+ __u64 addr;
+ __u32 len;
+ __u8 is_write;
+};
+
The following flags are defined:
#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
@@ -1348,6 +1355,7 @@ The following flags are defined:
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
#define KVM_IOEVENTFD_FLAG_READ (1 << kvm_ioeventfd_flag_nr_read)
#define KVM_IOEVENTFD_FLAG_NOWRITE (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET (1 << kvm_ioeventfd_flag_nr_socket)
If datamatch flag is set, the event will be signaled only if the written value
to the registered address is equal to datamatch in struct kvm_ioeventfd.
@@ -1359,6 +1367,14 @@ passed in datamatch.
If the nowrite flag is set, the event won't be signaled when the specified address
is being written to.
+If the socket flag is set, fd is expected to be a connected AF_UNIX
+SOCK_SEQPACKET socket. Once a guest write in the registered address is
+detected - a struct kvm_ioeventfd_data which describes the write will be
+written to the socket.
+On read, struct kvm_ioeventfd_data will be written with 'is_write = 0', and
+would wait for a response with a struct kvm_ioeventfd_data containing the
+value which should be 'read' by the guest.
+
5. The kvm_run structure
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a12711..ff3d808 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -389,6 +389,7 @@ enum {
kvm_ioeventfd_flag_nr_deassign,
kvm_ioeventfd_flag_nr_read,
kvm_ioeventfd_flag_nr_nowrite,
+ kvm_ioeventfd_flag_nr_socket,
kvm_ioeventfd_flag_nr_max,
};
@@ -397,6 +398,7 @@ enum {
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
#define KVM_IOEVENTFD_FLAG_READ (1 << kvm_ioeventfd_flag_nr_read)
#define KVM_IOEVENTFD_FLAG_NOWRITE (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET (1 << kvm_ioeventfd_flag_nr_socket)
#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1)
@@ -409,6 +411,13 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
+struct kvm_ioeventfd_data {
+ __u64 data;
+ __u64 addr;
+ __u32 len;
+ __u8 is_write;
+};
+
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
/* in */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 5f2d203..d1d63b3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@
#include <linux/eventfd.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/net.h>
#include "iodev.h"
@@ -413,10 +414,11 @@ module_exit(irqfd_module_exit);
/*
* --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
+ * a socket write.
*
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
+ * userspace can register a PIO/MMIO address with an eventfd or a
+ * socket for receiving notification when the memory has been touched.
* --------------------------------------------------------------------
*/
@@ -424,7 +426,10 @@ struct _ioeventfd {
struct list_head list;
u64 addr;
int length;
- struct eventfd_ctx *eventfd;
+ union {
+ struct socket *sock;
+ struct eventfd_ctx *eventfd;
+ };
u64 datamatch;
struct kvm_io_device dev;
bool wildcard;
@@ -441,7 +446,11 @@ to_ioeventfd(struct kvm_io_device *dev)
static void
ioeventfd_release(struct _ioeventfd *p)
{
- eventfd_ctx_put(p->eventfd);
+ if (p->eventfd)
+ eventfd_ctx_put(p->eventfd);
+ else
+ sockfd_put(p->sock);
+
list_del(&p->list);
kfree(p);
}
@@ -510,12 +519,65 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
return _val == p->datamatch ? true : false;
}
+static ssize_t socket_write(struct socket *sock, const void *buf, size_t count)
+{
+ mm_segment_t old_fs;
+ ssize_t res;
+ struct msghdr msg;
+ struct iovec iov;
+
+ iov = (struct iovec) {
+ .iov_base = (void *)buf,
+ .iov_len = count,
+ };
+
+ msg = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = sock_sendmsg(sock, &msg, count);
+ set_fs(old_fs);
+
+ return res;
+}
+
+static ssize_t socket_read(struct socket *sock, void *buf, size_t count)
+{
+ mm_segment_t old_fs;
+ ssize_t res;
+ struct msghdr msg;
+ struct iovec iov;
+
+ iov = (struct iovec) {
+ .iov_base = (void *)buf,
+ .iov_len = count,
+ };
+
+ msg = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = sock_recvmsg(sock, &msg, count, 0);
+ set_fs(old_fs);
+
+ return res;
+}
+
/* MMIO/PIO writes trigger an event if the addr/val match */
static int
ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
const void *val)
{
struct _ioeventfd *p = to_ioeventfd(this);
+ struct kvm_ioeventfd_data data;
/* Exit if signaling on writes isn't requested */
if (!p->track_writes)
@@ -524,7 +586,18 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
if (!ioeventfd_in_range(p, addr, len, val))
return -EOPNOTSUPP;
- eventfd_signal(p->eventfd, 1);
+ data = (struct kvm_ioeventfd_data) {
+ .data = get_val(val, len),
+ .addr = addr,
+ .len = len,
+ .is_write = 1,
+ };
+
+ if (p->sock)
+ socket_write(p->sock, &data, sizeof(data));
+ else
+ eventfd_signal(p->eventfd, 1);
+
return 0;
}
@@ -534,6 +607,7 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
void *val)
{
struct _ioeventfd *p = to_ioeventfd(this);
+ struct kvm_ioeventfd_data data;
/* Exit if signaling on reads isn't requested */
if (!p->track_reads)
@@ -542,7 +616,21 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int len,
if (!ioeventfd_in_range(p, addr, len, val))
return -EOPNOTSUPP;
- eventfd_signal(p->eventfd, 1);
+ data = (struct kvm_ioeventfd_data) {
+ .addr = addr,
+ .len = len,
+ .is_write = 0,
+ };
+
+ if (p->sock) {
+ socket_write(p->sock, &data, sizeof(data));
+ socket_read(p->sock, &data, sizeof(data));
+ set_val(val, len, data.data);
+ } else {
+ set_val(val, len, p->datamatch);
+ eventfd_signal(p->eventfd, 1);
+ }
+
return 0;
}
@@ -585,7 +673,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p;
- struct eventfd_ctx *eventfd;
+ struct eventfd_ctx *eventfd = NULL;
int ret;
/* check for range overflow */
@@ -596,10 +684,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
return -EINVAL;
- eventfd = eventfd_ctx_fdget(args->fd);
- if (IS_ERR(eventfd))
- return PTR_ERR(eventfd);
-
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
@@ -611,6 +695,20 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
p->length = args->len;
p->eventfd = eventfd;
+ if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+ ret = 0;
+ p->sock = sockfd_lookup(args->fd, &ret);
+ if (ret)
+ goto fail;
+ } else {
+ ret = -EINVAL;
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ goto fail;
+
+ p->eventfd = eventfd;
+ }
+
/* The datamatch feature is optional, otherwise this is a wildcard */
if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
p->datamatch = args->datamatch;
@@ -649,8 +747,14 @@ unlock_fail:
mutex_unlock(&kvm->slots_lock);
fail:
+ if (eventfd)
+ eventfd_ctx_put(eventfd);
+
+ if (p->sock)
+ sockfd_put(p->sock);
+
+
kfree(p);
- eventfd_ctx_put(eventfd);
return ret;
}
@@ -661,12 +765,21 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p, *tmp;
- struct eventfd_ctx *eventfd;
+ struct eventfd_ctx *eventfd = NULL;
+ struct socket *sock = NULL;
int ret = -ENOENT;
- eventfd = eventfd_ctx_fdget(args->fd);
- if (IS_ERR(eventfd))
- return PTR_ERR(eventfd);
+ if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+ ret = 0;
+ sock = sockfd_lookup(args->fd, &ret);
+ if (ret)
+ return PTR_ERR(sock);
+ } else {
+ ret = -EINVAL;
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+ }
mutex_lock(&kvm->slots_lock);
@@ -674,6 +787,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
if (p->eventfd != eventfd ||
+ p->sock != sock ||
p->addr != args->addr ||
p->length != args->len ||
p->wildcard != wildcard)
@@ -690,7 +804,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
mutex_unlock(&kvm->slots_lock);
- eventfd_ctx_put(eventfd);
+ if (eventfd)
+ eventfd_ctx_put(eventfd);
+ if (sock)
+ sockfd_put(sock);
return ret;
}
--
1.7.6
next prev parent reply other threads:[~2011-07-06 4:38 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-06 4:37 [PATCH 1/5] ioeventfd: Remove natural sized length limitation Sasha Levin
2011-07-06 4:37 ` [PATCH 2/5] ioeventfd: Add helper functions for reading and writing Sasha Levin
2011-07-06 4:37 ` [PATCH 3/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_READ Sasha Levin
2011-07-06 4:37 ` [PATCH 4/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_NOWRITE Sasha Levin
2011-07-06 4:37 ` Sasha Levin [this message]
2011-07-06 11:42 ` [PATCH 5/5] ioeventfd: Introduce KVM_IOEVENTFD_FLAG_SOCKET Michael S. Tsirkin
2011-07-06 15:01 ` Sasha Levin
2011-07-06 17:58 ` Michael S. Tsirkin
2011-07-10 5:34 ` Sasha Levin
2011-07-10 8:05 ` Michael S. Tsirkin
2011-07-12 11:23 ` Sasha Levin
2011-07-12 11:26 ` Avi Kivity
2011-07-13 6:37 ` Pekka Enberg
2011-07-13 6:45 ` Pekka Enberg
2011-07-13 7:07 ` Avi Kivity
2011-07-13 8:02 ` Pekka Enberg
2011-07-13 12:57 ` Avi Kivity
2011-07-13 13:00 ` Pekka Enberg
2011-07-13 13:32 ` Avi Kivity
2011-07-14 7:26 ` Pekka Enberg
2011-07-14 8:07 ` Sasha Levin
2011-07-14 8:09 ` Avi Kivity
2011-07-14 8:14 ` Pekka Enberg
2011-07-14 8:28 ` Avi Kivity
2011-07-14 8:59 ` Pekka Enberg
2011-07-14 9:48 ` Avi Kivity
[not found] ` <CAOJsxLHSeRuTOoiJssyrELRx-eXok3WinLr_+_G4dB+yHNBKdg@mail.gmai! l.com>
2011-07-14 10:30 ` Pekka Enberg
2011-07-14 11:54 ` Avi Kivity
2011-07-14 12:32 ` Sasha Levin
2011-07-14 12:46 ` Avi Kivity
2011-07-14 13:00 ` Sasha Levin
2011-07-14 13:05 ` Avi Kivity
2011-07-14 13:17 ` Pekka Enberg
2011-07-14 13:23 ` Avi Kivity
2011-07-20 2:52 ` Anthony Liguori
2011-07-20 6:16 ` Sasha Levin
2011-07-20 9:42 ` Pekka Enberg
2011-07-14 12:37 ` Pekka Enberg
2011-07-14 12:48 ` Avi Kivity
2011-07-14 12:52 ` Pekka Enberg
2011-07-14 12:54 ` Avi Kivity
2011-07-14 8:19 ` Gleb Natapov
2011-07-14 8:25 ` Michael S. Tsirkin
2011-07-14 8:29 ` Avi Kivity
2011-07-20 2:49 ` Anthony Liguori
2011-07-20 9:44 ` Pekka Enberg
2011-07-20 21:10 ` Anthony Liguori
2011-07-25 12:10 ` Sasha Levin
2011-07-25 12:16 ` Avi Kivity
2011-07-25 12:26 ` Sasha Levin
2011-07-25 13:04 ` Avi Kivity
2011-07-13 7:51 ` Pekka Enberg
2011-07-13 10:04 ` Pekka Enberg
2011-07-13 10:26 ` Sasha Levin
2011-07-13 10:56 ` Pekka Enberg
2011-07-13 11:14 ` Pekka Enberg
2011-07-06 12:39 ` Avi Kivity
2011-07-06 12:58 ` Sasha Levin
2011-07-06 13:04 ` Avi Kivity
2011-07-06 13:00 ` Avi Kivity
2011-07-20 2:42 ` Anthony Liguori
2011-07-20 8:19 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1309927078-5983-5-git-send-email-levinsasha928@gmail.com \
--to=levinsasha928@gmail.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=mst@redhat.com \
--cc=mtosatti@redhat.com \
--cc=penberg@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox