From: John Levon <john.levon@nutanix.com>
To: qemu-devel@nongnu.org
Cc: "Marc-André Lureau" <marcandre.lureau@redhat.com>,
"Thanos Makatos" <thanos.makatos@nutanix.com>,
"Daniel P. Berrangé" <berrange@redhat.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Peter Xu" <peterx@redhat.com>,
"David Hildenbrand" <david@redhat.com>,
"Cédric Le Goater" <clg@redhat.com>,
"Stefano Garzarella" <sgarzare@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Alex Williamson" <alex.williamson@redhat.com>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"John Levon" <john.levon@nutanix.com>,
"John Johnson" <john.g.johnson@oracle.com>,
"Elena Ufimtseva" <elena.ufimtseva@oracle.com>,
"Jagannathan Raman" <jag.raman@oracle.com>
Subject: [PATCH 26/27] vfio-user: add coalesced posted writes
Date: Thu, 15 May 2025 16:44:11 +0100 [thread overview]
Message-ID: <20250515154413.210315-27-john.levon@nutanix.com> (raw)
In-Reply-To: <20250515154413.210315-1-john.levon@nutanix.com>
Add new message to send multiple writes to server in a single message.
Prevents the outgoing queue from overflowing when a long latency
operation is followed by a series of posted writes.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio-user/protocol.h | 21 ++++++++++
hw/vfio-user/proxy.h | 12 ++++++
hw/vfio-user/device.c | 40 +++++++++++++++++++
hw/vfio-user/proxy.c | 84 +++++++++++++++++++++++++++++++++++++++
hw/vfio-user/trace-events | 1 +
5 files changed, 158 insertions(+)
diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h
index 8f589faef4..f747e70e50 100644
--- a/hw/vfio-user/protocol.h
+++ b/hw/vfio-user/protocol.h
@@ -42,6 +42,7 @@ enum vfio_user_command {
VFIO_USER_DMA_WRITE = 12,
VFIO_USER_DEVICE_RESET = 13,
VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_REGION_WRITE_MULTI = 15,
VFIO_USER_MAX,
};
@@ -75,6 +76,7 @@ typedef struct {
#define VFIO_USER_CAP_PGSIZES "pgsizes"
#define VFIO_USER_CAP_MAP_MAX "max_dma_maps"
#define VFIO_USER_CAP_MIGR "migration"
+#define VFIO_USER_CAP_MULTI "write_multiple"
/* "migration" members */
#define VFIO_USER_CAP_PGSIZE "pgsize"
@@ -221,4 +223,23 @@ typedef struct {
char data[];
} VFIOUserBitmap;
+/*
+ * VFIO_USER_REGION_WRITE_MULTI
+ */
+#define VFIO_USER_MULTI_DATA 8
+#define VFIO_USER_MULTI_MAX 200
+
+typedef struct {
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[VFIO_USER_MULTI_DATA];
+} VFIOUserWROne;
+
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t wr_cnt;
+ VFIOUserWROne wrs[VFIO_USER_MULTI_MAX];
+} VFIOUserWRMulti;
+
#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h
index 22ed66c54f..ae09b9cc60 100644
--- a/hw/vfio-user/proxy.h
+++ b/hw/vfio-user/proxy.h
@@ -88,6 +88,8 @@ typedef struct VFIOUserProxy {
VFIOUserMsg *last_nowait;
VFIOUserMsg *part_recv;
size_t recv_left;
+ VFIOUserWRMulti *wr_multi;
+ int num_outgoing;
enum proxy_state state;
} VFIOUserProxy;
@@ -95,6 +97,11 @@ typedef struct VFIOUserProxy {
#define VFIO_PROXY_CLIENT 0x1
#define VFIO_PROXY_FORCE_QUEUED 0x4
#define VFIO_PROXY_NO_POST 0x8
+#define VFIO_PROXY_USE_MULTI 0x16
+
+/* coalescing high and low water marks for VFIOProxy num_outgoing */
+#define VFIO_USER_OUT_HIGH 1024
+#define VFIO_USER_OUT_LOW 128
typedef struct VFIODevice VFIODevice;
@@ -122,4 +129,9 @@ void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size);
void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error);
+void vfio_user_flush_multi(VFIOUserProxy *proxy);
+void vfio_user_create_multi(VFIOUserProxy *proxy);
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data);
+
#endif /* VFIO_USER_PROXY_H */
diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c
index eb2194c0eb..79375ddc96 100644
--- a/hw/vfio-user/device.c
+++ b/hw/vfio-user/device.c
@@ -10,6 +10,8 @@
*/
#include "qemu/osdep.h"
+#include "qemu/lockable.h"
+#include "qemu/thread.h"
#include "hw/vfio-user/device.h"
#include "hw/vfio-user/trace.h"
@@ -296,6 +298,7 @@ static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
VFIOUserRegionRW *msgp = NULL;
VFIOUserProxy *proxy = vbasedev->proxy;
int size = sizeof(*msgp) + count;
+ bool can_multi;
int flags = 0;
int ret;
@@ -311,6 +314,43 @@ static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
flags |= VFIO_USER_NO_REPLY;
}
+ /* write eligible to be in a WRITE_MULTI msg ? */
+ can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
+ count <= VFIO_USER_MULTI_DATA;
+
+ /*
+ * This should be a rare case, so first check without the lock,
+ * if we're wrong, vfio_send_queued() will flush any posted writes
+ * we missed here
+ */
+ if (proxy->wr_multi != NULL ||
+ (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
+
+ /*
+ * re-check with lock
+ *
+ * if already building a WRITE_MULTI msg,
+ * add this one if possible else flush pending before
+ * sending the current one
+ *
+ * else if outgoing queue is over the highwater,
+ * start a new WRITE_MULTI message
+ */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ if (proxy->wr_multi != NULL) {
+ if (can_multi) {
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ vfio_user_flush_multi(proxy);
+ } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
+ vfio_user_create_multi(proxy);
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ }
+ }
+
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
msgp->offset = off;
diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c
index 13f2407845..dbaa322952 100644
--- a/hw/vfio-user/proxy.c
+++ b/hw/vfio-user/proxy.c
@@ -16,12 +16,14 @@
#include "hw/vfio-user/proxy.h"
#include "hw/vfio-user/trace.h"
#include "qapi/error.h"
+#include "qobject/qbool.h"
#include "qobject/qdict.h"
#include "qobject/qjson.h"
#include "qobject/qnum.h"
#include "qemu/error-report.h"
#include "qemu/lockable.h"
#include "qemu/main-loop.h"
+#include "qemu/thread.h"
#include "system/iothread.h"
static IOThread *vfio_user_iothread;
@@ -444,6 +446,11 @@ static void vfio_user_send(void *opaque)
}
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
vfio_user_recv, NULL, NULL, proxy);
+
+ /* queue empty - send any pending multi write msgs */
+ if (proxy->wr_multi != NULL) {
+ vfio_user_flush_multi(proxy);
+ }
}
}
@@ -464,6 +471,7 @@ static int vfio_user_send_one(VFIOUserProxy *proxy)
}
QTAILQ_REMOVE(&proxy->outgoing, msg, next);
+ proxy->num_outgoing--;
if (msg->type == VFIO_MSG_ASYNC) {
vfio_user_recycle(proxy, msg);
} else {
@@ -571,11 +579,18 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg)
{
int ret;
+ /* older coalesced writes go first */
+ if (proxy->wr_multi != NULL &&
+ ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) {
+ vfio_user_flush_multi(proxy);
+ }
+
/*
* Unsent outgoing msgs - add to tail
*/
if (!QTAILQ_EMPTY(&proxy->outgoing)) {
QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next);
+ proxy->num_outgoing++;
return 0;
}
@@ -589,6 +604,7 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg)
}
if (ret == QIO_CHANNEL_ERR_BLOCK) {
QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next);
+ proxy->num_outgoing = 1;
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
vfio_user_recv, proxy->ctx,
vfio_user_send, proxy);
@@ -1112,12 +1128,27 @@ static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
return caps_parse(proxy, qdict, caps_migr, errp);
}
+static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QBool *qb = qobject_to(QBool, qobj);
+
+ if (qb == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI);
+ return false;
+ }
+ if (qbool_get_bool(qb)) {
+ proxy->flags |= VFIO_PROXY_USE_MULTI;
+ }
+ return true;
+}
+
static struct cap_entry caps_cap[] = {
{ VFIO_USER_CAP_MAX_FDS, check_max_fds },
{ VFIO_USER_CAP_MAX_XFER, check_max_xfer },
{ VFIO_USER_CAP_PGSIZES, check_pgsizes },
{ VFIO_USER_CAP_MAP_MAX, check_max_dma },
{ VFIO_USER_CAP_MIGR, check_migr },
+ { VFIO_USER_CAP_MULTI, check_multi },
{ NULL }
};
@@ -1176,6 +1207,7 @@ static GString *caps_json(void)
qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER);
qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE);
qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX);
+ qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true);
qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict));
@@ -1228,3 +1260,55 @@ bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp)
trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
return true;
}
+
+void vfio_user_flush_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ int ret;
+
+ proxy->wr_multi = NULL;
+
+ /* adjust size for actual # of writes */
+ wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne);
+
+ msg = vfio_user_getmsg(proxy, &wm->hdr, NULL);
+ msg->id = wm->hdr.id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+ trace_vfio_user_wrmulti("flush", wm->wr_cnt);
+
+ ret = vfio_user_send_queued(proxy, msg);
+ if (ret < 0) {
+ vfio_user_recycle(proxy, msg);
+ }
+}
+
+void vfio_user_create_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserWRMulti *wm;
+
+ wm = g_malloc0(sizeof(*wm));
+ vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI,
+ sizeof(*wm), VFIO_USER_NO_REPLY);
+ proxy->wr_multi = wm;
+}
+
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data)
+{
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt];
+
+ w1->offset = offset;
+ w1->region = index;
+ w1->count = count;
+ memcpy(&w1->data, data, count);
+
+ wm->wr_cnt++;
+ trace_vfio_user_wrmulti("add", wm->wr_cnt);
+ if (wm->wr_cnt == VFIO_USER_MULTI_MAX ||
+ proxy->num_outgoing < VFIO_USER_OUT_LOW) {
+ vfio_user_flush_multi(proxy);
+ }
+}
diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events
index 7ef98813b3..64fac9137f 100644
--- a/hw/vfio-user/trace-events
+++ b/hw/vfio-user/trace-events
@@ -11,6 +11,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index
vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d"
vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d"
vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x"
+vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64
# container.c
vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d"
--
2.43.0
next prev parent reply other threads:[~2025-05-15 15:49 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-15 15:43 [PATCH 00/27] vfio-user client John Levon
2025-05-15 15:43 ` [PATCH 01/27] vfio: return mr from vfio_get_xlat_addr John Levon
2025-05-15 15:46 ` John Levon
2025-05-15 15:43 ` [PATCH 02/27] vfio/container: pass MemoryRegion to DMA operations John Levon
2025-05-16 15:11 ` Cédric Le Goater
2025-05-18 17:00 ` John Levon
2025-05-19 8:17 ` Cédric Le Goater
2025-05-15 15:43 ` [PATCH 03/27] vfio: move more cleanup into vfio_pci_put_device() John Levon
2025-05-16 15:21 ` Cédric Le Goater
2025-05-15 15:43 ` [PATCH 04/27] vfio: move config space read into vfio_pci_config_setup() John Levon
2025-05-16 15:26 ` Cédric Le Goater
2025-05-15 15:43 ` [PATCH 05/27] vfio: refactor out IRQ signalling setup John Levon
2025-05-16 15:27 ` Cédric Le Goater
2025-05-15 15:43 ` [PATCH 06/27] vfio: enable per-IRQ MSI-X masking John Levon
2025-05-15 15:43 ` [PATCH 07/27] vfio: add per-region fd support John Levon
2025-05-15 15:43 ` [PATCH 08/27] vfio: mark posted writes in region write callbacks John Levon
2025-05-15 15:43 ` [PATCH 09/27] vfio-user: introduce vfio-user protocol specification John Levon
2025-05-15 15:43 ` [PATCH 10/27] vfio-user: add vfio-user class and container John Levon
2025-05-15 15:43 ` [PATCH 11/27] vfio-user: connect vfio proxy to remote server John Levon
2025-05-15 15:43 ` [PATCH 12/27] vfio-user: implement message receive infrastructure John Levon
2025-05-15 15:43 ` [PATCH 13/27] vfio-user: implement message send infrastructure John Levon
2025-05-15 15:43 ` [PATCH 14/27] vfio-user: implement VFIO_USER_DEVICE_GET_INFO John Levon
2025-05-15 15:44 ` [PATCH 15/27] vfio-user: implement VFIO_USER_DEVICE_GET_REGION_INFO John Levon
2025-05-15 15:44 ` [PATCH 16/27] vfio-user: implement VFIO_USER_REGION_READ/WRITE John Levon
2025-05-15 15:44 ` [PATCH 17/27] vfio-user: set up PCI in vfio_user_pci_realize() John Levon
2025-05-15 15:44 ` [PATCH 18/27] vfio-user: implement VFIO_USER_DEVICE_GET/SET_IRQ* John Levon
2025-05-15 15:44 ` [PATCH 19/27] vfio-user: forward MSI-X PBA BAR accesses to server John Levon
2025-05-15 15:44 ` [PATCH 20/27] vfio-user: set up container access to the proxy John Levon
2025-05-15 15:44 ` [PATCH 21/27] vfio-user: implement VFIO_USER_DEVICE_RESET John Levon
2025-05-15 15:44 ` [PATCH 22/27] vfio-user: implement VFIO_USER_DMA_MAP/UNMAP John Levon
2025-05-15 15:44 ` [PATCH 23/27] vfio-user: implement VFIO_USER_DMA_READ/WRITE John Levon
2025-05-15 15:44 ` [PATCH 24/27] vfio-user: add 'x-msg-timeout' option John Levon
2025-05-15 15:44 ` [PATCH 25/27] vfio-user: support posted writes John Levon
2025-05-15 15:44 ` John Levon [this message]
2025-05-15 15:44 ` [PATCH 27/27] docs: add vfio-user documentation John Levon
2025-05-19 12:40 ` [PATCH 00/27] vfio-user client Cédric Le Goater
2025-05-19 13:29 ` John Levon
2025-05-20 5:59 ` Cédric Le Goater
2025-05-20 15:05 ` John Levon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250515154413.210315-27-john.levon@nutanix.com \
--to=john.levon@nutanix.com \
--cc=alex.williamson@redhat.com \
--cc=berrange@redhat.com \
--cc=clg@redhat.com \
--cc=david@redhat.com \
--cc=elena.ufimtseva@oracle.com \
--cc=jag.raman@oracle.com \
--cc=john.g.johnson@oracle.com \
--cc=marcandre.lureau@redhat.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=sgarzare@redhat.com \
--cc=thanos.makatos@nutanix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).