From: "Eugenio Pérez" <eperezma@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Laurent Vivier" <lvivier@redhat.com>,
"Jason Wang" <jasowang@redhat.com>,
"Dragos Tatulea DE" <dtatulea@nvidia.com>,
"Jonah Palmer" <jonah.palmer@oracle.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Eugenio Pérez" <eperezma@redhat.com>,
"Lei Yang" <leiyang@redhat.com>,
"Koushik Dutta" <kdutta@redhat.com>,
"Stefano Garzarella" <sgarzare@redhat.com>,
qemu-stable@nongnu.org, "Cindy Lu" <lulu@redhat.com>,
"Maxime Coquelin" <mcoqueli@redhat.com>
Subject: [PATCH 6/7] vhost: add in_order feature to shadow virtqueue
Date: Wed, 4 Mar 2026 18:35:34 +0100 [thread overview]
Message-ID: <20260304173535.2702587-7-eperezma@redhat.com> (raw)
In-Reply-To: <20260304173535.2702587-1-eperezma@redhat.com>
Some vdpa devices benefit from the in order feature. Add support to SVQ
so QEMU can migrate these.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 137 +++++++++++++++++++++++++++--
hw/virtio/vhost-shadow-virtqueue.h | 36 ++++++--
2 files changed, 160 insertions(+), 13 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 2d8fc82cc06f..60212fcd7bf3 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -12,11 +12,14 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
+#include "qemu/iov.h"
#include "qemu/main-loop.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
#include "linux-headers/linux/vhost.h"
+#define VIRTIO_RING_NOT_IN_BATCH UINT16_MAX
+
/**
* Validate the transport device features that both guests can use with the SVQ
* and SVQs can use with the device.
@@ -150,7 +153,33 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
static uint16_t vhost_svq_next_desc(const VhostShadowVirtqueue *svq,
uint16_t id)
{
- return svq->desc_state[id].next;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ return (id == svq->vring.num) ? 0 : ++id;
+ } else {
+ return svq->desc_state[id].next;
+ }
+}
+
+/**
+ * Updates the SVQ free_head member after adding them to the SVQ avail ring.
+ * The new free_head is the next descriptor that SVQ will make available by
+ * forwarding a new guest descriptor.
+ *
+ * @svq Shadow Virtqueue
+ * @num Number of descriptors added
+ * @id ID of the last descriptor added to the SVQ avail ring.
+ */
+static void vhost_svq_update_free_head(VhostShadowVirtqueue *svq,
+ size_t num, uint16_t id)
+{
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->free_head += num;
+ if (svq->free_head >= svq->vring.num) {
+ svq->free_head -= svq->vring.num;
+ }
+ } else {
+ svq->free_head = vhost_svq_next_desc(svq, id);
+ }
}
/**
@@ -202,7 +231,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
i = next;
}
- svq->free_head = vhost_svq_next_desc(svq, last);
+ vhost_svq_update_free_head(svq, num, last);
return true;
}
@@ -306,6 +335,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
svq->num_free -= ndescs;
svq->desc_state[qemu_head].elem = elem;
svq->desc_state[qemu_head].ndescs = ndescs;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->desc_state[qemu_head].in_bytes = iov_size(in_sg, in_num);
+ }
vhost_svq_kick(svq);
return 0;
}
@@ -401,6 +433,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
{
uint16_t *used_idx = &svq->vring.used->idx;
+
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER) &&
+ svq->batch_last.id != VIRTIO_RING_NOT_IN_BATCH) {
+ return true;
+ }
+
if (svq->last_used_idx != svq->shadow_used_idx) {
return true;
}
@@ -463,6 +501,47 @@ static uint16_t vhost_svq_get_last_used_split(VhostShadowVirtqueue *svq,
return le32_to_cpu(used->ring[last_used].id);
}
+/*
+ * Gets the next buffer id and moves forward the used idx, so the next time
+ * SVQ calls this function will get the next one. IN_ORDER version
+ *
+ * @svq: Shadow VirtQueue
+ * @len: Consumed length by the device.
+ *
+ * Return the next descriptor consumed by the device.
+ */
+static int32_t vhost_svq_get_last_used_split_in_order(
+ VhostShadowVirtqueue *svq,
+ uint32_t *len)
+{
+ unsigned num = svq->vring.num;
+ const vring_used_t *used = svq->vring.used;
+ uint16_t last_used = svq->last_used & (num - 1);
+ uint16_t last_used_idx = svq->last_used_idx & (num - 1);
+
+ if (svq->batch_last.id == VIRTIO_RING_NOT_IN_BATCH) {
+ svq->batch_last.id = le32_to_cpu(used->ring[last_used_idx].id);
+ svq->batch_last.len = le32_to_cpu(used->ring[last_used_idx].len);
+ }
+
+ if (unlikely(last_used >= num)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
+ svq->vdev->name, last_used);
+ return -1;
+ }
+
+ if (svq->batch_last.id == last_used) {
+ svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+ *len = svq->batch_last.len;
+ } else {
+ *len = svq->desc_state[last_used].in_bytes;
+ }
+
+ svq->last_used += svq->desc_state[last_used].ndescs;
+ svq->last_used_idx++;
+ return last_used;
+}
+
static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
uint16_t num, uint16_t i)
{
@@ -474,8 +553,8 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
}
G_GNUC_WARN_UNUSED_RESULT
-static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
- uint16_t id)
+static VirtQueueElement *vhost_svq_detach_buf_split(VhostShadowVirtqueue *svq,
+ uint16_t id)
{
uint16_t num = svq->desc_state[id].ndescs;
uint16_t last_used_chain = vhost_svq_last_desc_of_chain(svq, num, id);
@@ -486,6 +565,33 @@ static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
return g_steal_pointer(&svq->desc_state[id].elem);
}
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf_split_in_order(
+ VhostShadowVirtqueue *svq,
+ uint16_t id)
+{
+ return g_steal_pointer(&svq->desc_state[id].elem);
+}
+
+/*
+ * Return the descriptor id (and the chain of ids) to the free list
+ *
+ * @svq: Shadow Virtqueue
+ * @id: Id of the buffer to return.
+ *
+ * Return the element associated to the buffer if any.
+ */
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
+ uint16_t id)
+{
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ return vhost_svq_detach_buf_split_in_order(svq, id);
+ } else {
+ return vhost_svq_detach_buf_split(svq, id);
+ }
+}
+
G_GNUC_WARN_UNUSED_RESULT
static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
uint32_t *len)
@@ -498,7 +604,18 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
/* Only get used array entries after they have been exposed by dev */
smp_rmb();
- last_used = vhost_svq_get_last_used_split(svq, len);
+
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ int32_t r;
+ r = vhost_svq_get_last_used_split_in_order(svq, len);
+ if (r < 0) {
+ return NULL;
+ }
+
+ last_used = r;
+ } else {
+ last_used = vhost_svq_get_last_used_split(svq, len);
+ }
if (unlikely(last_used >= svq->vring.num)) {
qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
@@ -726,6 +843,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->next_guest_avail_elem = NULL;
svq->shadow_avail_idx = 0;
svq->shadow_used_idx = 0;
+ memset(&svq->batch_last, 0, sizeof(svq->batch_last));
+ svq->last_used = 0;
svq->last_used_idx = 0;
svq->vdev = vdev;
svq->vq = vq;
@@ -742,8 +861,12 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
-1, 0);
svq->desc_state = g_new0(SVQDescState, svq->vring.num);
- for (unsigned i = 0; i < svq->vring.num - 1; i++) {
- svq->desc_state[i].next = i + 1;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+ } else {
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
+ svq->desc_state[i].next = i + 1;
+ }
}
}
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index f52c33e65046..ec16a1e83858 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -24,11 +24,19 @@ typedef struct SVQDescState {
*/
unsigned int ndescs;
- /*
- * Backup next field for each descriptor so we can recover securely, not
- * needing to trust the device access.
- */
- uint16_t next;
+ union {
+ /*
+ * Total length of the available buffer that is writable by the device.
+ * Only used in packed vq.
+ */
+ uint32_t in_bytes;
+
+ /*
+ * Backup next field for each descriptor so we can recover securely, not
+ * needing to trust the device access. Only used in split vq.
+ */
+ uint16_t next;
+ };
} SVQDescState;
typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
@@ -99,9 +107,25 @@ typedef struct VhostShadowVirtqueue {
/* Next head to expose to the device */
uint16_t shadow_avail_idx;
- /* Next free descriptor */
+ /*
+ * Next free descriptor.
+ *
+ * Without IN_ORDER free_head is used as a linked list head, and
+ * desc_next[id] is the next element.
+ * With IN_ORDER free_head is the next available buffer index.
+ */
uint16_t free_head;
+ /*
+ * Last used element of the processing batch of used descriptors if
+ * IN_ORDER.
+ * If SVQ is not processing a batch of descriptors id is set to UINT_MAX.
+ */
+ vring_used_elem_t batch_last;
+
+ /* Last used id if IN_ORDER and split vq */
+ uint16_t last_used;
+
/* Last seen used idx */
uint16_t shadow_used_idx;
--
2.53.0
next prev parent reply other threads:[~2026-03-04 17:37 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-04 17:35 [PATCH 0/7] Add VIRTIO_F_IN_ORDER support to vhost shadow virtqueue Eugenio Pérez
2026-03-04 17:35 ` [PATCH 1/7] virtio: Allow to fill a whole virtqueue in order Eugenio Pérez
2026-03-06 3:26 ` Jason Wang
2026-03-06 6:22 ` Eugenio Perez Martin
2026-03-09 3:16 ` Jason Wang
2026-03-09 6:19 ` Eugenio Perez Martin
2026-03-10 3:09 ` Jason Wang
2026-03-10 6:21 ` Eugenio Perez Martin
2026-03-11 14:42 ` Jonah Palmer
2026-03-04 17:35 ` [PATCH 2/7] vhost: move svq next desc array to descs state struct Eugenio Pérez
2026-03-09 8:57 ` Jason Wang
2026-03-04 17:35 ` [PATCH 3/7] vhost: factor out the descriptor next fetching Eugenio Pérez
2026-03-09 8:57 ` Jason Wang
2026-03-04 17:35 ` [PATCH 4/7] vhost: factor out the get of last used desc in SVQ Eugenio Pérez
[not found] ` <CACGkMEukuUcCuTUpYEG5bdWD9dnJDWh2w50vsdhEbF2E=rNsvA@mail.gmail.com>
[not found] ` <CAJaqyWcTav8BWcRio+w4LYsTtAJSvJBJdeLoTdDBYAmh_2jjLg@mail.gmail.com>
2026-03-10 3:07 ` Jason Wang
2026-03-04 17:35 ` [PATCH 5/7] vhost: factor out the detach buf logic " Eugenio Pérez
2026-03-09 8:57 ` Jason Wang
2026-03-09 9:43 ` Eugenio Perez Martin
2026-03-10 3:04 ` Jason Wang
2026-03-10 6:36 ` Eugenio Perez Martin
2026-03-04 17:35 ` Eugenio Pérez [this message]
2026-03-09 8:57 ` [PATCH 6/7] vhost: add in_order feature to shadow virtqueue Jason Wang
2026-03-04 17:35 ` [PATCH 7/7] vhost: accept in order feature flag Eugenio Pérez
2026-03-09 8:57 ` Jason Wang
2026-03-11 11:50 ` [PATCH 0/7] Add VIRTIO_F_IN_ORDER support to vhost shadow virtqueue Michael Tokarev
2026-03-11 12:24 ` Eugenio Perez Martin
2026-03-11 13:49 ` Michael Tokarev
2026-05-13 6:45 ` Michael Tokarev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260304173535.2702587-7-eperezma@redhat.com \
--to=eperezma@redhat.com \
--cc=dtatulea@nvidia.com \
--cc=jasowang@redhat.com \
--cc=jonah.palmer@oracle.com \
--cc=kdutta@redhat.com \
--cc=leiyang@redhat.com \
--cc=lulu@redhat.com \
--cc=lvivier@redhat.com \
--cc=mcoqueli@redhat.com \
--cc=mst@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-stable@nongnu.org \
--cc=sgarzare@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.