From: John Johnson <john.g.johnson@oracle.com>
To: qemu-devel@nongnu.org
Subject: [RFC v4 08/21] vfio-user: define socket receive functions
Date: Tue, 11 Jan 2022 16:43:44 -0800 [thread overview]
Message-ID: <a89cfd3195740dfb313d1947c0c7de583e4d0f46.1641584317.git.john.g.johnson@oracle.com> (raw)
In-Reply-To: <cover.1641584316.git.john.g.johnson@oracle.com>
Add infrastructure needed to receive incoming messages
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/user-protocol.h | 54 ++++++++
hw/vfio/user.h | 6 +
hw/vfio/pci.c | 6 +
hw/vfio/user.c | 327 ++++++++++++++++++++++++++++++++++++++++++++++++
MAINTAINERS | 1 +
5 files changed, 394 insertions(+)
create mode 100644 hw/vfio/user-protocol.h
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
new file mode 100644
index 0000000..d23877c
--- /dev/null
+++ b/hw/vfio/user-protocol.h
@@ -0,0 +1,54 @@
+#ifndef VFIO_USER_PROTOCOL_H
+#define VFIO_USER_PROTOCOL_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specific data, such as the
+ * region and offset info for read and write commands.
+ */
+
+typedef struct {
+ uint16_t id;
+ uint16_t command;
+ uint32_t size;
+ uint32_t flags;
+ uint32_t error_reply;
+} VFIOUserHdr;
+
+/* VFIOUserHdr commands */
+enum vfio_user_command {
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_DMA_MAP = 2,
+ VFIO_USER_DMA_UNMAP = 3,
+ VFIO_USER_DEVICE_GET_INFO = 4,
+ VFIO_USER_DEVICE_GET_REGION_INFO = 5,
+ VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6,
+ VFIO_USER_DEVICE_GET_IRQ_INFO = 7,
+ VFIO_USER_DEVICE_SET_IRQS = 8,
+ VFIO_USER_REGION_READ = 9,
+ VFIO_USER_REGION_WRITE = 10,
+ VFIO_USER_DMA_READ = 11,
+ VFIO_USER_DMA_WRITE = 12,
+ VFIO_USER_DEVICE_RESET = 13,
+ VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_MAX,
+};
+
+/* VFIOUserHdr flags */
+#define VFIO_USER_REQUEST 0x0
+#define VFIO_USER_REPLY 0x1
+#define VFIO_USER_TYPE 0xF
+
+#define VFIO_USER_NO_REPLY 0x10
+#define VFIO_USER_ERROR 0x20
+
+#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index da92862..72eefa7 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -11,6 +11,8 @@
*
*/
+#include "user-protocol.h"
+
typedef struct {
int send_fds;
int recv_fds;
@@ -27,6 +29,7 @@ enum msg_type {
typedef struct VFIOUserMsg {
QTAILQ_ENTRY(VFIOUserMsg) next;
+ VFIOUserHdr *hdr;
VFIOUserFDs *fds;
uint32_t rsize;
uint32_t id;
@@ -74,5 +77,8 @@ typedef struct VFIOProxy {
VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
void vfio_user_disconnect(VFIOProxy *proxy);
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *reqarg);
#endif /* VFIO_USER_H */
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9fd7c07..0de915d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3386,6 +3386,11 @@ type_init(register_vfio_pci_dev_type)
* vfio-user routines.
*/
+static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
+{
+
+}
+
/*
* Emulated devices don't use host hot reset
*/
@@ -3432,6 +3437,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
return;
}
vbasedev->proxy = proxy;
+ vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
vbasedev->dev = DEVICE(vdev);
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index c843f90..e1dfd5d 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -25,10 +25,26 @@
#include "sysemu/iothread.h"
#include "user.h"
+static uint64_t max_xfer_size;
static IOThread *vfio_user_iothread;
static void vfio_user_shutdown(VFIOProxy *proxy);
+static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds);
+static VFIOUserFDs *vfio_user_getfds(int numfds);
+static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg);
+static void vfio_user_recv(void *opaque);
+static int vfio_user_recv_one(VFIOProxy *proxy);
+static void vfio_user_cb(void *opaque);
+
+static void vfio_user_request(void *opaque);
+
+static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
+{
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = err;
+}
/*
* Functions called by main, CPU, or iothread threads
@@ -40,10 +56,261 @@ static void vfio_user_shutdown(VFIOProxy *proxy)
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, NULL, NULL);
}
+static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds)
+{
+ VFIOUserMsg *msg;
+
+ msg = QTAILQ_FIRST(&proxy->free);
+ if (msg != NULL) {
+ QTAILQ_REMOVE(&proxy->free, msg, next);
+ } else {
+ msg = g_malloc0(sizeof(*msg));
+ qemu_cond_init(&msg->cv);
+ }
+
+ msg->hdr = hdr;
+ msg->fds = fds;
+ return msg;
+}
+
+/*
+ * Recycle a message list entry to the free list.
+ */
+static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg)
+{
+ if (msg->type == VFIO_MSG_NONE) {
+ error_printf("vfio_user_recycle - freeing free msg\n");
+ return;
+ }
+
+ /* free msg buffer if no one is waiting to consume the reply */
+ if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
+ g_free(msg->hdr);
+ if (msg->fds != NULL) {
+ g_free(msg->fds);
+ }
+ }
+
+ msg->type = VFIO_MSG_NONE;
+ msg->hdr = NULL;
+ msg->fds = NULL;
+ msg->complete = false;
+ QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
+}
+
+static VFIOUserFDs *vfio_user_getfds(int numfds)
+{
+ VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
+
+ fds->fds = (int *)((char *)fds + sizeof(*fds));
+
+ return fds;
+}
+
/*
* Functions only called by iothread
*/
+static void vfio_user_recv(void *opaque)
+{
+ VFIOProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ while (vfio_user_recv_one(proxy) == 0) {
+ ;
+ }
+ }
+}
+
+/*
+ * Receive and process one incoming message.
+ *
+ * For replies, find matching outgoing request and wake any waiters.
+ * For requests, queue in incoming list and run request BH.
+ */
+static int vfio_user_recv_one(VFIOProxy *proxy)
+{
+ VFIOUserMsg *msg = NULL;
+ g_autofree int *fdp = NULL;
+ VFIOUserFDs *reqfds;
+ VFIOUserHdr hdr;
+ struct iovec iov = {
+ .iov_base = &hdr,
+ .iov_len = sizeof(hdr),
+ };
+ bool isreply = false;
+ int i, ret;
+ size_t msgleft, numfds = 0;
+ char *data = NULL;
+ char *buf = NULL;
+ Error *local_err = NULL;
+
+ /*
+ * Read header
+ */
+ ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds,
+ &local_err);
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return ret;
+ }
+ if (ret <= 0) {
+ /* read error or other side closed connection */
+ if (ret == 0) {
+ error_setg(&local_err, "vfio_user_recv server closed socket");
+ } else {
+ error_prepend(&local_err, "vfio_user_recv");
+ }
+ goto fatal;
+ }
+ if (ret < sizeof(msg)) {
+ error_setg(&local_err, "vfio_user_recv short read of header");
+ goto fatal;
+ }
+
+ /*
+ * Validate header
+ */
+ if (hdr.size < sizeof(VFIOUserHdr)) {
+ error_setg(&local_err, "vfio_user_recv bad header size");
+ goto fatal;
+ }
+ switch (hdr.flags & VFIO_USER_TYPE) {
+ case VFIO_USER_REQUEST:
+ isreply = false;
+ break;
+ case VFIO_USER_REPLY:
+ isreply = true;
+ break;
+ default:
+ error_setg(&local_err, "vfio_user_recv unknown message type");
+ goto fatal;
+ }
+
+ /*
+ * For replies, find the matching pending request.
+ * For requests, reap incoming FDs.
+ */
+ if (isreply) {
+ QTAILQ_FOREACH(msg, &proxy->pending, next) {
+ if (hdr.id == msg->id) {
+ break;
+ }
+ }
+ if (msg == NULL) {
+ error_setg(&local_err, "vfio_user_recv unexpected reply");
+ goto err;
+ }
+ QTAILQ_REMOVE(&proxy->pending, msg, next);
+
+ /*
+ * Process any received FDs
+ */
+ if (numfds != 0) {
+ if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
+ error_setg(&local_err, "vfio_user_recv unexpected FDs");
+ goto err;
+ }
+ msg->fds->recv_fds = numfds;
+ memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
+ }
+ } else {
+ if (numfds != 0) {
+ reqfds = vfio_user_getfds(numfds);
+ memcpy(reqfds->fds, fdp, numfds * sizeof(int));
+ } else {
+ reqfds = NULL;
+ }
+ }
+
+ /*
+ * Put the whole message into a single buffer.
+ */
+ if (isreply) {
+ if (hdr.size > msg->rsize) {
+ error_setg(&local_err,
+ "vfio_user_recv reply larger than recv buffer");
+ goto err;
+ }
+ *msg->hdr = hdr;
+ data = (char *)msg->hdr + sizeof(hdr);
+ } else {
+ if (hdr.size > max_xfer_size) {
+ error_setg(&local_err, "vfio_user_recv request larger than max");
+ goto err;
+ }
+ buf = g_malloc0(hdr.size);
+ memcpy(buf, &hdr, sizeof(hdr));
+ data = buf + sizeof(hdr);
+ msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
+ msg->type = VFIO_MSG_REQ;
+ }
+
+ msgleft = hdr.size - sizeof(hdr);
+ while (msgleft > 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err);
+
+ /* error or would block */
+ if (ret < 0) {
+ goto fatal;
+ }
+
+ msgleft -= ret;
+ data += ret;
+ }
+
+ /*
+ * Replies signal a waiter, if none just check for errors
+ * and free the message buffer.
+ *
+ * Requests get queued for the BH.
+ */
+ if (isreply) {
+ msg->complete = true;
+ if (msg->type == VFIO_MSG_WAIT) {
+ qemu_cond_signal(&msg->cv);
+ } else {
+ if (hdr.flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_rcv error reply on async request ");
+ error_printf("command %x error %s\n", hdr.command,
+ strerror(hdr.error_reply));
+ }
+ /* youngest nowait msg has been ack'd */
+ if (proxy->last_nowait == msg) {
+ proxy->last_nowait = NULL;
+ }
+ vfio_user_recycle(proxy, msg);
+ }
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
+ qemu_bh_schedule(proxy->req_bh);
+ }
+ return 0;
+
+ /*
+ * fatal means the other side closed or we don't trust the stream
+ * err means this message is corrupt
+ */
+fatal:
+ vfio_user_shutdown(proxy);
+ proxy->state = VFIO_PROXY_ERROR;
+
+err:
+ for (i = 0; i < numfds; i++) {
+ close(fdp[i]);
+ }
+ if (isreply && msg != NULL) {
+ /* force an error to keep sending thread from hanging */
+ vfio_user_set_error(msg->hdr, EINVAL);
+ msg->complete = true;
+ qemu_cond_signal(&msg->cv);
+ }
+ error_report_err(local_err);
+ return -1;
+}
+
static void vfio_user_cb(void *opaque)
{
VFIOProxy *proxy = opaque;
@@ -59,6 +326,51 @@ static void vfio_user_cb(void *opaque)
* Functions called by main or CPU threads
*/
+/*
+ * Process incoming requests.
+ *
+ * The bus-specific callback has the form:
+ * request(opaque, msg)
+ * where 'opaque' was specified in vfio_user_set_handler
+ * and 'msg' is the inbound message.
+ *
+ * The callback is responsible for disposing of the message buffer,
+ * usually by re-using it when calling vfio_send_reply or vfio_send_error,
+ * both of which free their message buffer when the reply is sent.
+ *
+ * If the callback uses a new buffer, it needs to free the old one.
+ */
+static void vfio_user_request(void *opaque)
+{
+ VFIOProxy *proxy = opaque;
+ VFIOUserMsgQ new, free;
+ VFIOUserMsg *msg, *m1;
+
+ /* reap all incoming */
+ QTAILQ_INIT(&new);
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
+ QTAILQ_REMOVE(&proxy->pending, msg, next);
+ QTAILQ_INSERT_TAIL(&new, msg, next);
+ }
+ }
+
+ /* process list */
+ QTAILQ_INIT(&free);
+ QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
+ QTAILQ_REMOVE(&new, msg, next);
+ proxy->request(proxy->req_arg, msg);
+ QTAILQ_INSERT_HEAD(&free, msg, next);
+ }
+
+ /* free list */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
+ vfio_user_recycle(proxy, msg);
+ }
+ }
+}
+
static QLIST_HEAD(, VFIOProxy) vfio_user_sockets =
QLIST_HEAD_INITIALIZER(vfio_user_sockets);
@@ -97,6 +409,7 @@ VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
}
proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
+ proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
QTAILQ_INIT(&proxy->outgoing);
QTAILQ_INIT(&proxy->incoming);
@@ -107,6 +420,18 @@ VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
return proxy;
}
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *req_arg)
+{
+ VFIOProxy *proxy = vbasedev->proxy;
+
+ proxy->request = handler;
+ proxy->req_arg = req_arg;
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, NULL, proxy);
+}
+
void vfio_user_disconnect(VFIOProxy *proxy)
{
VFIOUserMsg *r1, *r2;
@@ -122,6 +447,8 @@ void vfio_user_disconnect(VFIOProxy *proxy)
}
object_unref(OBJECT(proxy->ioc));
proxy->ioc = NULL;
+ qemu_bh_delete(proxy->req_bh);
+ proxy->req_bh = NULL;
proxy->state = VFIO_PROXY_CLOSING;
QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
diff --git a/MAINTAINERS b/MAINTAINERS
index cfaccbf..bc0ba88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1909,6 +1909,7 @@ S: Supported
F: docs/devel/vfio-user.rst
F: hw/vfio/user.c
F: hw/vfio/user.h
+F: hw/vfio/user-protocol.h
vhost
M: Michael S. Tsirkin <mst@redhat.com>
--
1.8.3.1
next prev parent reply other threads:[~2022-01-12 1:12 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-12 0:43 [RFC v4 00/21] vfio-user client John Johnson
2022-01-12 0:43 ` [RFC v4 01/21] vfio-user: introduce vfio-user protocol specification John Johnson
2022-02-14 13:10 ` Thanos Makatos
2022-03-09 22:34 ` Alex Williamson
2022-03-10 10:20 ` John Levon
2022-03-14 6:04 ` John Johnson
2022-03-15 21:43 ` Thanos Makatos
2022-03-15 22:28 ` Alex Williamson
2022-07-22 6:23 ` John Johnson
2022-01-12 0:43 ` [RFC v4 02/21] vfio-user: add VFIO base abstract class John Johnson
2022-01-12 0:43 ` [RFC v4 03/21] vfio-user: add container IO ops vector John Johnson
2022-01-12 0:43 ` [RFC v4 04/21] vfio-user: add region cache John Johnson
2022-03-09 23:40 ` Alex Williamson
2022-01-12 0:43 ` [RFC v4 05/21] vfio-user: add device IO ops vector John Johnson
2022-01-12 0:43 ` [RFC v4 06/21] vfio-user: Define type vfio_user_pci_dev_info John Johnson
2022-01-12 0:43 ` [RFC v4 07/21] vfio-user: connect vfio proxy to remote server John Johnson
2022-01-12 0:43 ` John Johnson [this message]
2022-02-03 21:53 ` [RFC v4 08/21] vfio-user: define socket receive functions Thanos Makatos
2022-02-04 12:42 ` Thanos Makatos
2022-02-07 7:07 ` John Johnson
2022-02-15 13:35 ` Thanos Makatos
2022-02-15 14:50 ` Thanos Makatos
2022-02-16 2:09 ` John Johnson
2022-02-16 9:31 ` Thanos Makatos
2022-01-12 0:43 ` [RFC v4 09/21] vfio-user: define socket send functions John Johnson
2022-01-26 10:17 ` Thanos Makatos
2022-02-07 7:09 ` John Johnson
2022-01-12 0:43 ` [RFC v4 10/21] vfio-user: get device info John Johnson
2022-01-12 0:43 ` [RFC v4 11/21] vfio-user: get region info John Johnson
2022-01-12 0:43 ` [RFC v4 12/21] vfio-user: region read/write John Johnson
2022-01-26 21:57 ` Thanos Makatos
2022-01-12 0:43 ` [RFC v4 13/21] vfio-user: pci_user_realize PCI setup John Johnson
2022-01-12 0:43 ` [RFC v4 14/21] vfio-user: get and set IRQs John Johnson
2022-01-12 0:43 ` [RFC v4 15/21] vfio-user: proxy container connect/disconnect John Johnson
2022-01-12 0:43 ` [RFC v4 16/21] vfio-user: dma map/unmap operations John Johnson
2022-01-12 0:43 ` [RFC v4 17/21] vfio-user: secure DMA support John Johnson
2022-01-12 0:43 ` [RFC v4 18/21] vfio-user: dma read/write operations John Johnson
2022-01-12 0:43 ` [RFC v4 19/21] vfio-user: pci reset John Johnson
2022-01-12 0:43 ` [RFC v4 20/21] vfio-user: migration support John Johnson
2022-02-11 13:31 ` Thanos Makatos
2022-02-14 18:50 ` John Johnson
2022-02-15 14:53 ` Thanos Makatos
2022-01-12 0:43 ` [RFC v4 21/21] Only set qemu file error if saving state so the file exists John Johnson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a89cfd3195740dfb313d1947c0c7de583e4d0f46.1641584317.git.john.g.johnson@oracle.com \
--to=john.g.johnson@oracle.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).