qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Cédric Le Goater" <clg@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
	"John Levon" <john.levon@nutanix.com>,
	"John Johnson" <john.g.johnson@oracle.com>,
	"Elena Ufimtseva" <elena.ufimtseva@oracle.com>,
	"Jagannathan Raman" <jag.raman@oracle.com>,
	"Cédric Le Goater" <clg@redhat.com>
Subject: [PULL 20/25] vfio-user: implement VFIO_USER_DMA_READ/WRITE
Date: Thu, 26 Jun 2025 09:45:24 +0200	[thread overview]
Message-ID: <20250626074529.1384114-21-clg@redhat.com> (raw)
In-Reply-To: <20250626074529.1384114-1-clg@redhat.com>

From: John Levon <john.levon@nutanix.com>

Unlike most other messages, this is a server->client message, for when a
server wants to do "DMA"; this is slow, so normally the server has
memory directly mapped instead.

Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-15-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 hw/vfio-user/protocol.h |  13 ++++-
 hw/vfio-user/proxy.h    |   3 ++
 hw/vfio-user/pci.c      | 111 ++++++++++++++++++++++++++++++++++++++++
 hw/vfio-user/proxy.c    |  97 +++++++++++++++++++++++++++++++++++
 4 files changed, 223 insertions(+), 1 deletion(-)

diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h
index 524f3d633ab8800ad0934ffad861c7b86a8e70b3..3e9d8e576bae6a51d8fbc7cf26bb35d29e0e1aee 100644
--- a/hw/vfio-user/protocol.h
+++ b/hw/vfio-user/protocol.h
@@ -200,7 +200,18 @@ typedef struct {
     char data[];
 } VFIOUserRegionRW;
 
-/*imported from struct vfio_bitmap */
+/*
+ * VFIO_USER_DMA_READ
+ * VFIO_USER_DMA_WRITE
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint64_t offset;
+    uint32_t count;
+    char data[];
+} VFIOUserDMARW;
+
+/* imported from struct vfio_bitmap */
 typedef struct {
     uint64_t pgsize;
     uint64_t size;
diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h
index e2fc83ca3b12aa891d6843345c7c5015e8811aa5..39092c08c8d31ad315e1942ddd09134a8a4569a0 100644
--- a/hw/vfio-user/proxy.h
+++ b/hw/vfio-user/proxy.h
@@ -101,6 +101,7 @@ void vfio_user_set_handler(VFIODevice *vbasedev,
 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
 
 VFIOUserFDs *vfio_user_getfds(int numfds);
+void vfio_user_putfds(VFIOUserMsg *msg);
 
 void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
                            uint32_t size, uint32_t flags);
@@ -109,5 +110,7 @@ bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
                          VFIOUserFDs *fds, int rsize, Error **errp);
 bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
                            VFIOUserFDs *fds, int rsize, Error **errp);
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size);
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error);
 
 #endif /* VFIO_USER_PROXY_H */
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
index 49d12763aba73f780ec5b202dfd4d3dc98837491..040660d1977cf97d9c983985d30d9488e391e0c0 100644
--- a/hw/vfio-user/pci.c
+++ b/hw/vfio-user/pci.c
@@ -9,6 +9,7 @@
 #include <sys/ioctl.h>
 #include "qemu/osdep.h"
 #include "qapi-visit-sockets.h"
+#include "qemu/error-report.h"
 
 #include "hw/qdev-properties.h"
 #include "hw/vfio/pci.h"
@@ -80,6 +81,95 @@ static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
     vdev->msix->pba_region = NULL;
 }
 
+static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+    VFIOUserDMARW *res;
+    MemTxResult r;
+    size_t size;
+
+    if (msg->hdr.size < sizeof(*msg)) {
+        vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+        return;
+    }
+    if (msg->count > proxy->max_xfer_size) {
+        vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+        return;
+    }
+
+    /* switch to our own message buffer */
+    size = msg->count + sizeof(VFIOUserDMARW);
+    res = g_malloc0(size);
+    memcpy(res, msg, sizeof(*res));
+    g_free(msg);
+
+    r = pci_dma_read(pdev, res->offset, &res->data, res->count);
+
+    switch (r) {
+    case MEMTX_OK:
+        if (res->hdr.flags & VFIO_USER_NO_REPLY) {
+            g_free(res);
+            return;
+        }
+        vfio_user_send_reply(proxy, &res->hdr, size);
+        break;
+    case MEMTX_ERROR:
+        vfio_user_send_error(proxy, &res->hdr, EFAULT);
+        break;
+    case MEMTX_DECODE_ERROR:
+        vfio_user_send_error(proxy, &res->hdr, ENODEV);
+        break;
+    case MEMTX_ACCESS_ERROR:
+        vfio_user_send_error(proxy, &res->hdr, EPERM);
+        break;
+    default:
+        error_printf("vfio_user_dma_read unknown error %d\n", r);
+        vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL);
+    }
+}
+
+static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+    MemTxResult r;
+
+    if (msg->hdr.size < sizeof(*msg)) {
+        vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+        return;
+    }
+    /* make sure transfer count isn't larger than the message data */
+    if (msg->count > msg->hdr.size - sizeof(*msg)) {
+        vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+        return;
+    }
+
+    r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count);
+
+    switch (r) {
+    case MEMTX_OK:
+        if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) {
+            vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr));
+        } else {
+            g_free(msg);
+        }
+        break;
+    case MEMTX_ERROR:
+        vfio_user_send_error(proxy, &msg->hdr, EFAULT);
+        break;
+    case MEMTX_DECODE_ERROR:
+        vfio_user_send_error(proxy, &msg->hdr, ENODEV);
+        break;
+    case MEMTX_ACCESS_ERROR:
+        vfio_user_send_error(proxy, &msg->hdr, EPERM);
+        break;
+    default:
+        error_printf("vfio_user_dma_write unknown error %d\n", r);
+        vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL);
+    }
+}
+
 /*
  * Incoming request message callback.
  *
@@ -87,7 +177,28 @@ static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
  */
 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
 {
+    VFIOPCIDevice *vdev = opaque;
+    VFIOUserHdr *hdr = msg->hdr;
+
+    /* no incoming PCI requests pass FDs */
+    if (msg->fds != NULL) {
+        vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL);
+        vfio_user_putfds(msg);
+        return;
+    }
 
+    switch (hdr->command) {
+    case VFIO_USER_DMA_READ:
+        vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr);
+        break;
+    case VFIO_USER_DMA_WRITE:
+        vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr);
+        break;
+    default:
+        error_printf("vfio_user_pci_process_req unknown cmd %d\n",
+                     hdr->command);
+        vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS);
+    }
 }
 
 /*
diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c
index c8ae8a59b441a1151c884dfb8625fde696af664c..cb93d9a660605f6675a141a1645f2f3391e4aa8a 100644
--- a/hw/vfio-user/proxy.c
+++ b/hw/vfio-user/proxy.c
@@ -347,6 +347,10 @@ static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
         *msg->hdr = hdr;
         data = (char *)msg->hdr + sizeof(hdr);
     } else {
+        if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) {
+            error_setg(errp, "vfio_user_recv request larger than max");
+            goto err;
+        }
         buf = g_malloc0(hdr.size);
         memcpy(buf, &hdr, sizeof(hdr));
         data = buf + sizeof(hdr);
@@ -702,6 +706,40 @@ bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
     return ok;
 }
 
+/*
+ * async send - msg can be queued, but will be freed when sent
+ *
+ * Returns false on failure, in which case @errp will be populated.
+ *
+ * In either case, ownership of @hdr and @fds is taken, and the caller must
+ * *not* free them itself.
+ */
+static bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+                                 VFIOUserFDs *fds, Error **errp)
+{
+    VFIOUserMsg *msg;
+
+    QEMU_LOCK_GUARD(&proxy->lock);
+
+    msg = vfio_user_getmsg(proxy, hdr, fds);
+    msg->id = hdr->id;
+    msg->rsize = 0;
+    msg->type = VFIO_MSG_ASYNC;
+
+    if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
+        error_setg_errno(errp, EINVAL, "%s on sync message", __func__);
+        vfio_user_recycle(proxy, msg);
+        return false;
+    }
+
+    if (!vfio_user_send_queued(proxy, msg, errp)) {
+        vfio_user_recycle(proxy, msg);
+        return false;
+    }
+
+    return true;
+}
+
 void vfio_user_wait_reqs(VFIOUserProxy *proxy)
 {
     VFIOUserMsg *msg;
@@ -746,6 +784,65 @@ void vfio_user_wait_reqs(VFIOUserProxy *proxy)
     qemu_mutex_unlock(&proxy->lock);
 }
 
+/*
+ * Reply to an incoming request.
+ */
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size)
+{
+    Error *local_err = NULL;
+
+    if (size < sizeof(VFIOUserHdr)) {
+        error_printf("%s: size too small", __func__);
+        g_free(hdr);
+        return;
+    }
+
+    /*
+     * convert header to associated reply
+     */
+    hdr->flags = VFIO_USER_REPLY;
+    hdr->size = size;
+
+    if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+        error_report_err(local_err);
+    }
+}
+
+/*
+ * Send an error reply to an incoming request.
+ */
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error)
+{
+    Error *local_err = NULL;
+
+    /*
+     * convert header to associated reply
+     */
+    hdr->flags = VFIO_USER_REPLY;
+    hdr->flags |= VFIO_USER_ERROR;
+    hdr->error_reply = error;
+    hdr->size = sizeof(*hdr);
+
+    if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+        error_report_err(local_err);
+    }
+}
+
+/*
+ * Close FDs erroneously received in an incoming request.
+ */
+void vfio_user_putfds(VFIOUserMsg *msg)
+{
+    VFIOUserFDs *fds = msg->fds;
+    int i;
+
+    for (i = 0; i < fds->recv_fds; i++) {
+        close(fds->fds[i]);
+    }
+    g_free(fds);
+    msg->fds = NULL;
+}
+
 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
     QLIST_HEAD_INITIALIZER(vfio_user_sockets);
 
-- 
2.49.0



  parent reply	other threads:[~2025-06-26  7:49 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-26  7:45 [PULL 00/25] vfio queue Cédric Le Goater
2025-06-26  7:45 ` [PULL 01/25] hw/vfio/ap: attribute constructor for cfg_chg_events_lock Cédric Le Goater
2025-06-26  7:45 ` [PULL 02/25] vfio: add vfio_device_get_region_fd() Cédric Le Goater
2025-06-26  7:45 ` [PULL 03/25] vfio: add documentation for posted write argument Cédric Le Goater
2025-06-26  7:45 ` [PULL 04/25] vfio: add license tag to some files Cédric Le Goater
2025-06-26  7:45 ` [PULL 05/25] vfio/container: Fix SIGSEGV when open container file fails Cédric Le Goater
2025-06-26  7:45 ` [PULL 06/25] vfio/container: fails mdev hotplug if add migration blocker failed Cédric Le Goater
2025-06-26  7:45 ` [PULL 07/25] vfio-user: add vfio-user class and container Cédric Le Goater
2025-06-26  7:45 ` [PULL 08/25] vfio-user: connect vfio proxy to remote server Cédric Le Goater
2025-07-10 12:33   ` Peter Maydell
2025-07-10 12:56     ` John Levon
2025-06-26  7:45 ` [PULL 09/25] vfio-user: implement message receive infrastructure Cédric Le Goater
2025-07-10 12:44   ` Peter Maydell
2025-06-26  7:45 ` [PULL 10/25] vfio-user: implement message send infrastructure Cédric Le Goater
2025-06-26  7:45 ` [PULL 11/25] vfio-user: implement VFIO_USER_DEVICE_GET_INFO Cédric Le Goater
2025-06-26  7:45 ` [PULL 12/25] vfio-user: implement VFIO_USER_DEVICE_GET_REGION_INFO Cédric Le Goater
2025-06-26  7:45 ` [PULL 13/25] vfio-user: implement VFIO_USER_REGION_READ/WRITE Cédric Le Goater
2025-06-26  7:45 ` [PULL 14/25] vfio-user: set up PCI in vfio_user_pci_realize() Cédric Le Goater
2025-06-26  7:45 ` [PULL 15/25] vfio-user: implement VFIO_USER_DEVICE_GET/SET_IRQ* Cédric Le Goater
2025-06-26  7:45 ` [PULL 16/25] vfio-user: forward MSI-X PBA BAR accesses to server Cédric Le Goater
2025-06-26  7:45 ` [PULL 17/25] vfio-user: set up container access to the proxy Cédric Le Goater
2025-06-26  7:45 ` [PULL 18/25] vfio-user: implement VFIO_USER_DEVICE_RESET Cédric Le Goater
2025-06-26  7:45 ` [PULL 19/25] vfio-user: implement VFIO_USER_DMA_MAP/UNMAP Cédric Le Goater
2025-06-26  7:45 ` Cédric Le Goater [this message]
2025-06-26  7:45 ` [PULL 21/25] vfio-user: add 'x-msg-timeout' option Cédric Le Goater
2025-06-26  7:45 ` [PULL 22/25] vfio-user: support posted writes Cédric Le Goater
2025-06-26  7:45 ` [PULL 23/25] vfio-user: add coalesced " Cédric Le Goater
2025-06-26  7:45 ` [PULL 24/25] docs: add vfio-user documentation Cédric Le Goater
2025-06-26  7:45 ` [PULL 25/25] vfio-user: introduce vfio-user protocol specification Cédric Le Goater
2025-06-27 18:30 ` [PULL 00/25] vfio queue Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250626074529.1384114-21-clg@redhat.com \
    --to=clg@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=elena.ufimtseva@oracle.com \
    --cc=jag.raman@oracle.com \
    --cc=john.g.johnson@oracle.com \
    --cc=john.levon@nutanix.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).