* [RFC 00/10] Add vduse live migration features
@ 2026-02-11 16:14 Eugenio Pérez
2026-02-11 16:14 ` [RFC 01/10] uapi: align VDUSE header for ASID Eugenio Pérez
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
This series introduces features to the VDUSE (vDPA Device in Userspace) driver
to support Live Migration.
Currently, DPDK does not support VDUSE devices live migration because the
driver lacks a mechanism to suspend the device and quiesce the rings to
initiate the switchover. This series implements the suspend operation to
address this limitation.
Furthermore, enabling Live Migration for devices with control virtqueue needs
two additional features. Both of them are included in this series.
* Address Spaces (ASID) support: This allows QEMU to isolate and intercept the
device's CVQ. By doing so, QEMU is able to migrate the device status
transparently, without requiring the device to support state save and
restore.
* QUEUE_ENABLE: This allows QEMU to control when the dataplane virtqueues are
enabled. This ensures the dataplane is started after the device
configuration has been fully restores via the CVQ.
Last but not least, it enables the VIRTIO_NET_F_STATUS feature. This allows the
device to signal the driver that it needs to send gratuitous ARP with
VIRTIO_NET_S_ANNOUNCE, reducing the Live Migration downtime.
Eugenio Pérez (1):
uapi: align VDUSE header for ASID
Maxime Coquelin (6):
vhost: introduce ASID support
vhost: add VDUSE API version negotiation
vhost: add virtqueues groups support to VDUSE
vhost: add ASID support to VDUSE IOTLB operations
vhost: claim VDUSE support for API version 1
vhost: add net status feature to VDUSE
Super User (3):
uapi: Align vduse.h for enable and suspend VDUSE messages
vhost: Support VDUSE QUEUE_READY feature
vhost: Support vduse suspend feature
kernel/linux/uapi/linux/vduse.h | 118 ++++++++++++++++-
lib/vhost/iotlb.c | 226 ++++++++++++++++++++------------
lib/vhost/iotlb.h | 14 +-
lib/vhost/vduse.c | 208 ++++++++++++++++++++++++++---
lib/vhost/vduse.h | 3 +-
lib/vhost/vhost.c | 16 +--
lib/vhost/vhost.h | 16 +--
lib/vhost/vhost_user.c | 11 +-
8 files changed, 475 insertions(+), 137 deletions(-)
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread
* [RFC 01/10] uapi: align VDUSE header for ASID
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 02/10] vhost: introduce ASID support Eugenio Pérez
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
Add all the ioctls and argument struct definitions so we can call them
in next patches.
These headers are not in Linux master at the moment, but they're planned
for 7.0.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
kernel/linux/uapi/linux/vduse.h | 87 ++++++++++++++++++++++++++++++---
1 file changed, 81 insertions(+), 6 deletions(-)
diff --git a/kernel/linux/uapi/linux/vduse.h b/kernel/linux/uapi/linux/vduse.h
index f46269af349a..e19b3c0f51b5 100644
--- a/kernel/linux/uapi/linux/vduse.h
+++ b/kernel/linux/uapi/linux/vduse.h
@@ -10,6 +10,10 @@
#define VDUSE_API_VERSION 0
+/* VQ groups and ASID support */
+
+#define VDUSE_API_VERSION_1 1
+
/*
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
* This is used for future extension.
@@ -27,6 +31,8 @@
* @features: virtio features
* @vq_num: the number of virtqueues
* @vq_align: the allocation alignment of virtqueue's metadata
+ * @ngroups: number of vq groups that VDUSE device declares
+ * @nas: number of address spaces that VDUSE device declares
* @reserved: for future use, needs to be initialized to zero
* @config_size: the size of the configuration space
* @config: the buffer of the configuration space
@@ -41,7 +47,9 @@ struct vduse_dev_config {
__u64 features;
__u32 vq_num;
__u32 vq_align;
- __u32 reserved[13];
+ __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */
+ __u32 nas; /* if VDUSE_API_VERSION >= 1 */
+ __u32 reserved[11];
__u32 config_size;
__u8 config[];
};
@@ -118,14 +126,18 @@ struct vduse_config_data {
* struct vduse_vq_config - basic configuration of a virtqueue
* @index: virtqueue index
* @max_size: the max size of virtqueue
- * @reserved: for future use, needs to be initialized to zero
+ * @reserved1: for future use, needs to be initialized to zero
+ * @group: virtqueue group
+ * @reserved2: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
*/
struct vduse_vq_config {
__u32 index;
__u16 max_size;
- __u16 reserved[13];
+ __u16 reserved1;
+ __u32 group;
+ __u16 reserved2[10];
};
/*
@@ -156,6 +168,16 @@ struct vduse_vq_state_packed {
__u16 last_used_idx;
};
+/**
+ * struct vduse_vq_group_asid - virtqueue group ASID
+ * @group: Index of the virtqueue group
+ * @asid: Address space ID of the group
+ */
+struct vduse_vq_group_asid {
+ __u32 group;
+ __u32 asid;
+};
+
/**
* struct vduse_vq_info - information of a virtqueue
* @index: virtqueue index
@@ -215,6 +237,7 @@ struct vduse_vq_eventfd {
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: start of the IOVA region
* @size: size of the IOVA region
+ * @asid: Address space ID of the IOVA region
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
@@ -224,7 +247,8 @@ struct vduse_iova_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
- __u64 reserved[3];
+ __u32 asid;
+ __u32 reserved[5];
};
/* Register userspace memory for IOVA regions */
@@ -237,7 +261,8 @@ struct vduse_iova_umem {
* struct vduse_iova_info - information of one IOVA region
* @start: start of the IOVA region
* @last: last of the IOVA region
- * @capability: capability of the IOVA regsion
+ * @capability: capability of the IOVA region
+ * @asid: Address space ID of the IOVA region, only if device API version >= 1
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
@@ -248,7 +273,8 @@ struct vduse_iova_info {
__u64 last;
#define VDUSE_IOVA_CAP_UMEM (1 << 0)
__u64 capability;
- __u64 reserved[3];
+ __u32 asid; /* Only if device API version >= 1 */
+ __u32 reserved[5];
};
/*
@@ -257,6 +283,32 @@ struct vduse_iova_info {
*/
#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
+/**
+ * struct vduse_iotlb_entry_v2 - entry of IOTLB to describe one IOVA region
+ *
+ * @v1: the original vduse_iotlb_entry
+ * @asid: address space ID of the IOVA region
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region.
+ */
+struct vduse_iotlb_entry_v2 {
+ __u64 offset;
+ __u64 start;
+ __u64 last;
+ __u8 perm;
+ __u8 padding[7];
+ __u32 asid;
+ __u32 reserved[11];
+};
+
+/*
+ * Same as VDUSE_IOTLB_GET_FD but with vduse_iotlb_entry_v2 argument that
+ * support extra fields.
+ */
+#define VDUSE_IOTLB_GET_FD2 _IOWR(VDUSE_BASE, 0x1b, struct vduse_iotlb_entry_v2)
+
+
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
/**
@@ -265,11 +317,14 @@ struct vduse_iova_info {
* @VDUSE_SET_STATUS: set the device status
* @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
* specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
+ * @VDUSE_SET_VQ_GROUP_ASID: Notify userspace to update the address space of a
+ * virtqueue group.
*/
enum vduse_req_type {
VDUSE_GET_VQ_STATE,
VDUSE_SET_STATUS,
VDUSE_UPDATE_IOTLB,
+ VDUSE_SET_VQ_GROUP_ASID,
};
/**
@@ -304,6 +359,19 @@ struct vduse_iova_range {
__u64 last;
};
+/**
+ * struct vduse_iova_range_v2 - IOVA range [start, last] if API_VERSION >= 1
+ * @start: start of the IOVA range
+ * @last: last of the IOVA range
+ * @asid: address space ID of the IOVA range
+ */
+struct vduse_iova_range_v2 {
+ __u64 start;
+ __u64 last;
+ __u32 asid;
+ __u32 padding;
+};
+
/**
* struct vduse_dev_request - control request
* @type: request type
@@ -312,6 +380,8 @@ struct vduse_iova_range {
* @vq_state: virtqueue state, only index field is available
* @s: device status
* @iova: IOVA range for updating
+ * @iova_v2: IOVA range for updating if API_VERSION >= 1
+ * @vq_group_asid: ASID of a virtqueue group
* @padding: padding
*
* Structure used by read(2) on /dev/vduse/$NAME.
@@ -324,6 +394,11 @@ struct vduse_dev_request {
struct vduse_vq_state vq_state;
struct vduse_dev_status s;
struct vduse_iova_range iova;
+ /* Following members but padding exist only if vduse api
+ * version >= 1
+ */
+ struct vduse_iova_range_v2 iova_v2;
+ struct vduse_vq_group_asid vq_group_asid;
__u32 padding[32];
};
};
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 02/10] vhost: introduce ASID support
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
2026-02-11 16:14 ` [RFC 01/10] uapi: align VDUSE header for ASID Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 03/10] vhost: add VDUSE API version negotiation Eugenio Pérez
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/iotlb.c | 226 +++++++++++++++++++++++++----------------
lib/vhost/iotlb.h | 14 +--
lib/vhost/vduse.c | 11 +-
lib/vhost/vhost.c | 16 +--
lib/vhost/vhost.h | 13 +--
lib/vhost/vhost_user.c | 11 +-
6 files changed, 171 insertions(+), 120 deletions(-)
diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
index f2c275a7d77e..112d3d0e359b 100644
--- a/lib/vhost/iotlb.c
+++ b/lib/vhost/iotlb.c
@@ -11,6 +11,16 @@
#include "iotlb.h"
#include "vhost.h"
+struct iotlb {
+ rte_rwlock_t pending_lock;
+ struct vhost_iotlb_entry *pool;
+ TAILQ_HEAD(, vhost_iotlb_entry) list;
+ TAILQ_HEAD(, vhost_iotlb_entry) pending_list;
+ int cache_nr;
+ rte_spinlock_t free_lock;
+ SLIST_HEAD(, vhost_iotlb_entry) free_list;
+};
+
struct vhost_iotlb_entry {
TAILQ_ENTRY(vhost_iotlb_entry) next;
SLIST_ENTRY(vhost_iotlb_entry) next_free;
@@ -85,78 +95,78 @@ vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct vhost_iotlb_entry *no
}
static struct vhost_iotlb_entry *
-vhost_user_iotlb_pool_get(struct virtio_net *dev)
+vhost_user_iotlb_pool_get(struct virtio_net *dev, int asid)
{
struct vhost_iotlb_entry *node;
- rte_spinlock_lock(&dev->iotlb_free_lock);
- node = SLIST_FIRST(&dev->iotlb_free_list);
+ rte_spinlock_lock(&dev->iotlb[asid]->free_lock);
+ node = SLIST_FIRST(&dev->iotlb[asid]->free_list);
if (node != NULL)
- SLIST_REMOVE_HEAD(&dev->iotlb_free_list, next_free);
- rte_spinlock_unlock(&dev->iotlb_free_lock);
+ SLIST_REMOVE_HEAD(&dev->iotlb[asid]->free_list, next_free);
+ rte_spinlock_unlock(&dev->iotlb[asid]->free_lock);
return node;
}
static void
-vhost_user_iotlb_pool_put(struct virtio_net *dev, struct vhost_iotlb_entry *node)
+vhost_user_iotlb_pool_put(struct virtio_net *dev, int asid, struct vhost_iotlb_entry *node)
{
- rte_spinlock_lock(&dev->iotlb_free_lock);
- SLIST_INSERT_HEAD(&dev->iotlb_free_list, node, next_free);
- rte_spinlock_unlock(&dev->iotlb_free_lock);
+ rte_spinlock_lock(&dev->iotlb[asid]->free_lock);
+ SLIST_INSERT_HEAD(&dev->iotlb[asid]->free_list, node, next_free);
+ rte_spinlock_unlock(&dev->iotlb[asid]->free_lock);
}
static void
-vhost_user_iotlb_cache_random_evict(struct virtio_net *dev);
+vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, int asid);
static void
-vhost_user_iotlb_pending_remove_all(struct virtio_net *dev)
+vhost_user_iotlb_pending_remove_all(struct virtio_net *dev, int asid)
{
struct vhost_iotlb_entry *node, *temp_node;
- rte_rwlock_write_lock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_lock(&dev->iotlb[asid]->pending_lock);
- RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next, temp_node) {
- TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
- vhost_user_iotlb_pool_put(dev, node);
+ RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb[asid]->pending_list, next, temp_node) {
+ TAILQ_REMOVE(&dev->iotlb[asid]->pending_list, node, next);
+ vhost_user_iotlb_pool_put(dev, asid, node);
}
- rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_unlock(&dev->iotlb[asid]->pending_lock);
}
bool
-vhost_user_iotlb_pending_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+vhost_user_iotlb_pending_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm)
{
struct vhost_iotlb_entry *node;
bool found = false;
- rte_rwlock_read_lock(&dev->iotlb_pending_lock);
+ rte_rwlock_read_lock(&dev->iotlb[asid]->pending_lock);
- TAILQ_FOREACH(node, &dev->iotlb_pending_list, next) {
+ TAILQ_FOREACH(node, &dev->iotlb[asid]->pending_list, next) {
if ((node->iova == iova) && (node->perm == perm)) {
found = true;
break;
}
}
- rte_rwlock_read_unlock(&dev->iotlb_pending_lock);
+ rte_rwlock_read_unlock(&dev->iotlb[asid]->pending_lock);
return found;
}
void
-vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+vhost_user_iotlb_pending_insert(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm)
{
struct vhost_iotlb_entry *node;
- node = vhost_user_iotlb_pool_get(dev);
+ node = vhost_user_iotlb_pool_get(dev, asid);
if (node == NULL) {
VHOST_CONFIG_LOG(dev->ifname, DEBUG,
"IOTLB pool empty, clear entries for pending insertion");
- if (!TAILQ_EMPTY(&dev->iotlb_pending_list))
- vhost_user_iotlb_pending_remove_all(dev);
+ if (!TAILQ_EMPTY(&dev->iotlb[asid]->pending_list))
+ vhost_user_iotlb_pending_remove_all(dev, asid);
else
- vhost_user_iotlb_cache_random_evict(dev);
- node = vhost_user_iotlb_pool_get(dev);
+ vhost_user_iotlb_cache_random_evict(dev, asid);
+ node = vhost_user_iotlb_pool_get(dev, asid);
if (node == NULL) {
VHOST_CONFIG_LOG(dev->ifname, ERR,
"IOTLB pool still empty, pending insertion failure");
@@ -167,21 +177,22 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t p
node->iova = iova;
node->perm = perm;
- rte_rwlock_write_lock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_lock(&dev->iotlb[asid]->pending_lock);
- TAILQ_INSERT_TAIL(&dev->iotlb_pending_list, node, next);
+ TAILQ_INSERT_TAIL(&dev->iotlb[asid]->pending_list, node, next);
- rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_unlock(&dev->iotlb[asid]->pending_lock);
}
void
-vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova, uint64_t size, uint8_t perm)
+vhost_user_iotlb_pending_remove(struct virtio_net *dev, int asid,
+ uint64_t iova, uint64_t size, uint8_t perm)
{
struct vhost_iotlb_entry *node, *temp_node;
- rte_rwlock_write_lock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_lock(&dev->iotlb[asid]->pending_lock);
- RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next,
+ RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb[asid]->pending_list, next,
temp_node) {
if (node->iova < iova)
continue;
@@ -189,53 +200,53 @@ vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova, uint64_t
continue;
if ((node->perm & perm) != node->perm)
continue;
- TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
- vhost_user_iotlb_pool_put(dev, node);
+ TAILQ_REMOVE(&dev->iotlb[asid]->pending_list, node, next);
+ vhost_user_iotlb_pool_put(dev, asid, node);
}
- rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
+ rte_rwlock_write_unlock(&dev->iotlb[asid]->pending_lock);
}
static void
-vhost_user_iotlb_cache_remove_all(struct virtio_net *dev)
+vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, int asid)
{
struct vhost_iotlb_entry *node, *temp_node;
vhost_user_iotlb_wr_lock_all(dev);
- RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
+ RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb[asid]->list, next, temp_node) {
vhost_user_iotlb_clear_dump(dev, node, NULL, NULL);
- TAILQ_REMOVE(&dev->iotlb_list, node, next);
+ TAILQ_REMOVE(&dev->iotlb[asid]->list, node, next);
vhost_user_iotlb_remove_notify(dev, node);
- vhost_user_iotlb_pool_put(dev, node);
+ vhost_user_iotlb_pool_put(dev, asid, node);
}
- dev->iotlb_cache_nr = 0;
+ dev->iotlb[asid]->cache_nr = 0;
vhost_user_iotlb_wr_unlock_all(dev);
}
static void
-vhost_user_iotlb_cache_random_evict(struct virtio_net *dev)
+vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, int asid)
{
struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
int entry_idx;
vhost_user_iotlb_wr_lock_all(dev);
- entry_idx = rte_rand() % dev->iotlb_cache_nr;
+ entry_idx = rte_rand() % dev->iotlb[asid]->cache_nr;
- RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
+ RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb[asid]->list, next, temp_node) {
if (!entry_idx) {
struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
- TAILQ_REMOVE(&dev->iotlb_list, node, next);
+ TAILQ_REMOVE(&dev->iotlb[asid]->list, node, next);
vhost_user_iotlb_remove_notify(dev, node);
- vhost_user_iotlb_pool_put(dev, node);
- dev->iotlb_cache_nr--;
+ vhost_user_iotlb_pool_put(dev, asid, node);
+ dev->iotlb[asid]->cache_nr--;
break;
}
prev_node = node;
@@ -246,20 +257,20 @@ vhost_user_iotlb_cache_random_evict(struct virtio_net *dev)
}
void
-vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t uaddr,
+vhost_user_iotlb_cache_insert(struct virtio_net *dev, int asid, uint64_t iova, uint64_t uaddr,
uint64_t uoffset, uint64_t size, uint64_t page_size, uint8_t perm)
{
struct vhost_iotlb_entry *node, *new_node;
- new_node = vhost_user_iotlb_pool_get(dev);
+ new_node = vhost_user_iotlb_pool_get(dev, asid);
if (new_node == NULL) {
VHOST_CONFIG_LOG(dev->ifname, DEBUG,
"IOTLB pool empty, clear entries for cache insertion");
- if (!TAILQ_EMPTY(&dev->iotlb_list))
- vhost_user_iotlb_cache_random_evict(dev);
+ if (!TAILQ_EMPTY(&dev->iotlb[asid]->list))
+ vhost_user_iotlb_cache_random_evict(dev, asid);
else
- vhost_user_iotlb_pending_remove_all(dev);
- new_node = vhost_user_iotlb_pool_get(dev);
+ vhost_user_iotlb_pending_remove_all(dev, asid);
+ new_node = vhost_user_iotlb_pool_get(dev, asid);
if (new_node == NULL) {
VHOST_CONFIG_LOG(dev->ifname, ERR,
"IOTLB pool still empty, cache insertion failed");
@@ -276,36 +287,36 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t ua
vhost_user_iotlb_wr_lock_all(dev);
- TAILQ_FOREACH(node, &dev->iotlb_list, next) {
+ TAILQ_FOREACH(node, &dev->iotlb[asid]->list, next) {
/*
* Entries must be invalidated before being updated.
* So if iova already in list, assume identical.
*/
if (node->iova == new_node->iova) {
- vhost_user_iotlb_pool_put(dev, new_node);
+ vhost_user_iotlb_pool_put(dev, asid, new_node);
goto unlock;
} else if (node->iova > new_node->iova) {
vhost_user_iotlb_set_dump(dev, new_node);
TAILQ_INSERT_BEFORE(node, new_node, next);
- dev->iotlb_cache_nr++;
+ dev->iotlb[asid]->cache_nr++;
goto unlock;
}
}
vhost_user_iotlb_set_dump(dev, new_node);
- TAILQ_INSERT_TAIL(&dev->iotlb_list, new_node, next);
- dev->iotlb_cache_nr++;
+ TAILQ_INSERT_TAIL(&dev->iotlb[asid]->list, new_node, next);
+ dev->iotlb[asid]->cache_nr++;
unlock:
- vhost_user_iotlb_pending_remove(dev, iova, size, perm);
+ vhost_user_iotlb_pending_remove(dev, asid, iova, size, perm);
vhost_user_iotlb_wr_unlock_all(dev);
}
void
-vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t size)
+vhost_user_iotlb_cache_remove(struct virtio_net *dev, int asid, uint64_t iova, uint64_t size)
{
struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
@@ -314,7 +325,7 @@ vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t si
vhost_user_iotlb_wr_lock_all(dev);
- RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
+ RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb[asid]->list, next, temp_node) {
/* Sorted list */
if (unlikely(iova + size < node->iova))
break;
@@ -324,10 +335,10 @@ vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t si
vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
- TAILQ_REMOVE(&dev->iotlb_list, node, next);
+ TAILQ_REMOVE(&dev->iotlb[asid]->list, node, next);
vhost_user_iotlb_remove_notify(dev, node);
- vhost_user_iotlb_pool_put(dev, node);
- dev->iotlb_cache_nr--;
+ vhost_user_iotlb_pool_put(dev, asid, node);
+ dev->iotlb[asid]->cache_nr--;
} else {
prev_node = node;
}
@@ -337,7 +348,8 @@ vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t si
}
uint64_t
-vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *size, uint8_t perm)
+vhost_user_iotlb_cache_find(struct virtio_net *dev, int asid,
+ uint64_t iova, uint64_t *size, uint8_t perm)
{
struct vhost_iotlb_entry *node;
uint64_t offset, vva = 0, mapped = 0;
@@ -345,7 +357,7 @@ vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *siz
if (unlikely(!*size))
goto out;
- TAILQ_FOREACH(node, &dev->iotlb_list, next) {
+ TAILQ_FOREACH(node, &dev->iotlb[asid]->list, next) {
/* List sorted by iova */
if (unlikely(iova < node->iova))
break;
@@ -378,25 +390,28 @@ vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *siz
}
void
-vhost_user_iotlb_flush_all(struct virtio_net *dev)
+vhost_user_iotlb_flush_all(struct virtio_net *dev, int asid)
{
- vhost_user_iotlb_cache_remove_all(dev);
- vhost_user_iotlb_pending_remove_all(dev);
+ vhost_user_iotlb_cache_remove_all(dev, asid);
+ vhost_user_iotlb_pending_remove_all(dev, asid);
}
-int
-vhost_user_iotlb_init(struct virtio_net *dev)
+static int
+vhost_user_iotlb_init_one(struct virtio_net *dev, int asid)
{
unsigned int i;
int socket = 0;
- if (dev->iotlb_pool) {
- /*
- * The cache has already been initialized,
- * just drop all cached and pending entries.
- */
- vhost_user_iotlb_flush_all(dev);
- rte_free(dev->iotlb_pool);
+ if (dev->iotlb[asid]) {
+ if (dev->iotlb[asid]->pool) {
+ /*
+ * The cache has already been initialized,
+ * just drop all cached and pending entries.
+ */
+ vhost_user_iotlb_flush_all(dev, asid);
+ rte_free(dev->iotlb[asid]->pool);
+ }
+ rte_free(dev->iotlb[asid]);
}
#ifdef RTE_LIBRTE_VHOST_NUMA
@@ -404,31 +419,68 @@ vhost_user_iotlb_init(struct virtio_net *dev)
socket = 0;
#endif
- rte_spinlock_init(&dev->iotlb_free_lock);
- rte_rwlock_init(&dev->iotlb_pending_lock);
+ dev->iotlb[asid] = rte_malloc_socket("iotlb", sizeof(struct iotlb), 0, socket);
+ if (!dev->iotlb[asid]) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to allocate IOTLB");
+ return -1;
+ }
+
+ rte_spinlock_init(&dev->iotlb[asid]->free_lock);
+ rte_rwlock_init(&dev->iotlb[asid]->pending_lock);
- SLIST_INIT(&dev->iotlb_free_list);
- TAILQ_INIT(&dev->iotlb_list);
- TAILQ_INIT(&dev->iotlb_pending_list);
+ SLIST_INIT(&dev->iotlb[asid]->free_list);
+ TAILQ_INIT(&dev->iotlb[asid]->list);
+ TAILQ_INIT(&dev->iotlb[asid]->pending_list);
if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
- dev->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
+ dev->iotlb[asid]->pool = rte_calloc_socket("iotlb_pool", IOTLB_CACHE_SIZE,
sizeof(struct vhost_iotlb_entry), 0, socket);
- if (!dev->iotlb_pool) {
+ if (!dev->iotlb[asid]->pool) {
VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to create IOTLB cache pool");
return -1;
}
for (i = 0; i < IOTLB_CACHE_SIZE; i++)
- vhost_user_iotlb_pool_put(dev, &dev->iotlb_pool[i]);
+ vhost_user_iotlb_pool_put(dev, asid, &dev->iotlb[asid]->pool[i]);
}
- dev->iotlb_cache_nr = 0;
+ dev->iotlb[asid]->cache_nr = 0;
return 0;
}
+int
+vhost_user_iotlb_init(struct virtio_net *dev)
+{
+ int i;
+
+ for (i = 0; i < IOTLB_MAX_ASID; i++)
+ if (vhost_user_iotlb_init_one(dev, i) < 0)
+ goto fail;
+
+ return 0;
+fail:
+ while (i--)
+ {
+ rte_free(dev->iotlb[i]->pool);
+ dev->iotlb[i]->pool = NULL;
+ }
+
+ return -1;
+}
+
void
vhost_user_iotlb_destroy(struct virtio_net *dev)
{
- rte_free(dev->iotlb_pool);
+ int i;
+
+ for (i = 0; i < IOTLB_MAX_ASID; i++)
+ {
+ if (dev->iotlb[i]) {
+ rte_free(dev->iotlb[i]->pool);
+ dev->iotlb[i]->pool = NULL;
+
+ rte_free(dev->iotlb[i]);
+ dev->iotlb[i] = NULL;
+ }
+ }
}
diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h
index 72232b0dcf08..52963d6c4de0 100644
--- a/lib/vhost/iotlb.h
+++ b/lib/vhost/iotlb.h
@@ -57,16 +57,16 @@ vhost_user_iotlb_wr_unlock_all(struct virtio_net *dev)
rte_rwlock_write_unlock(&dev->virtqueue[i]->iotlb_lock);
}
-void vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t uaddr,
+void vhost_user_iotlb_cache_insert(struct virtio_net *dev, int asid, uint64_t iova, uint64_t uaddr,
uint64_t uoffset, uint64_t size, uint64_t page_size, uint8_t perm);
-void vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t size);
-uint64_t vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova,
+void vhost_user_iotlb_cache_remove(struct virtio_net *dev, int asid, uint64_t iova, uint64_t size);
+uint64_t vhost_user_iotlb_cache_find(struct virtio_net *dev, int asid, uint64_t iova,
uint64_t *size, uint8_t perm);
-bool vhost_user_iotlb_pending_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
-void vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t perm);
-void vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova,
+bool vhost_user_iotlb_pending_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm);
+void vhost_user_iotlb_pending_insert(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm);
+void vhost_user_iotlb_pending_remove(struct virtio_net *dev, int asid, uint64_t iova,
uint64_t size, uint8_t perm);
-void vhost_user_iotlb_flush_all(struct virtio_net *dev);
+void vhost_user_iotlb_flush_all(struct virtio_net *dev, int asid);
int vhost_user_iotlb_init(struct virtio_net *dev);
void vhost_user_iotlb_destroy(struct virtio_net *dev);
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 0b5d158feeb9..49f2f23b9703 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -57,7 +57,7 @@ vduse_iotlb_remove_notify(uint64_t addr, uint64_t offset, uint64_t size)
}
static int
-vduse_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm __rte_unused)
+vduse_iotlb_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm __rte_unused)
{
struct vduse_iotlb_entry entry;
uint64_t size, page_size;
@@ -102,7 +102,7 @@ vduse_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm __rte_unuse
}
page_size = (uint64_t)stat.st_blksize;
- vhost_user_iotlb_cache_insert(dev, entry.start, (uint64_t)(uintptr_t)mmap_addr,
+ vhost_user_iotlb_cache_insert(dev, asid, entry.start, (uint64_t)(uintptr_t)mmap_addr,
entry.offset, size, page_size, entry.perm);
ret = 0;
@@ -398,7 +398,8 @@ vduse_device_stop(struct virtio_net *dev)
for (i = 0; i < dev->nr_vring; i++)
vduse_vring_cleanup(dev, i);
- vhost_user_iotlb_flush_all(dev);
+ for (i = 0; i < IOTLB_MAX_ASID; i++)
+ vhost_user_iotlb_flush_all(dev, i);
}
static void
@@ -445,8 +446,8 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
case VDUSE_UPDATE_IOTLB:
VHOST_CONFIG_LOG(dev->ifname, INFO, "\tIOVA range: %" PRIx64 " - %" PRIx64,
(uint64_t)req.iova.start, (uint64_t)req.iova.last);
- vhost_user_iotlb_cache_remove(dev, req.iova.start,
- req.iova.last - req.iova.start + 1);
+ vhost_user_iotlb_cache_remove(dev, 0, req.iova.start,
+ req.iova.last - req.iova.start + 1); /* ToDo: use ASID once API available, using 0 for now */
resp.result = VDUSE_REQ_RESULT_OK;
break;
default:
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 7e68b2c3be92..cb3af28671cc 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -62,9 +62,9 @@ static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
#define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
static int
-vhost_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+vhost_iotlb_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm)
{
- return dev->backend_ops->iotlb_miss(dev, iova, perm);
+ return dev->backend_ops->iotlb_miss(dev, asid, iova, perm);
}
uint64_t
@@ -78,7 +78,7 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
tmp_size = *size;
- vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
+ vva = vhost_user_iotlb_cache_find(dev, vq->asid, iova, &tmp_size, perm);
if (tmp_size == *size) {
if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
vq->stats.iotlb_hits++;
@@ -90,7 +90,7 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
iova += tmp_size;
- if (!vhost_user_iotlb_pending_miss(dev, iova, perm)) {
+ if (!vhost_user_iotlb_pending_miss(dev, vq->asid, iova, perm)) {
/*
* iotlb_lock is read-locked for a full burst,
* but it only protects the iotlb cache.
@@ -100,12 +100,12 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
*/
vhost_user_iotlb_rd_unlock(vq);
- vhost_user_iotlb_pending_insert(dev, iova, perm);
- if (vhost_iotlb_miss(dev, iova, perm)) {
+ vhost_user_iotlb_pending_insert(dev, vq->asid, iova, perm);
+ if (vhost_iotlb_miss(dev, vq->asid, iova, perm)) {
VHOST_DATA_LOG(dev->ifname, ERR,
"IOTLB miss req failed for IOVA 0x%" PRIx64,
iova);
- vhost_user_iotlb_pending_remove(dev, iova, 1, perm);
+ vhost_user_iotlb_pending_remove(dev, vq->asid, iova, 1, perm);
}
vhost_user_iotlb_rd_lock(vq);
@@ -113,7 +113,7 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
tmp_size = *size;
/* Retry in case of VDUSE, as it is synchronous */
- vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
+ vva = vhost_user_iotlb_cache_find(dev, vq->asid, iova, &tmp_size, perm);
if (tmp_size == *size)
return vva;
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index ee61f7415ee3..fef493016df5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -85,7 +85,7 @@ struct vhost_virtqueue;
typedef void (*vhost_iotlb_remove_notify)(uint64_t addr, uint64_t off, uint64_t size);
-typedef int (*vhost_iotlb_miss_cb)(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+typedef int (*vhost_iotlb_miss_cb)(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm);
typedef int (*vhost_vring_inject_irq_cb)(struct virtio_net *dev, struct vhost_virtqueue *vq);
/**
@@ -326,6 +326,7 @@ struct __rte_cache_aligned vhost_virtqueue {
uint16_t batch_copy_nb_elems;
struct batch_copy_elem *batch_copy_elems;
int numa_node;
+ int asid;
bool used_wrap_counter;
bool avail_wrap_counter;
@@ -483,6 +484,8 @@ struct inflight_mem_info {
uint64_t size;
};
+#define IOTLB_MAX_ASID 2
+
/**
* Device structure contains all configuration information relating
* to the device.
@@ -504,13 +507,7 @@ struct __rte_cache_aligned virtio_net {
int linearbuf;
struct vhost_virtqueue *virtqueue[VHOST_MAX_VRING];
- rte_rwlock_t iotlb_pending_lock;
- struct vhost_iotlb_entry *iotlb_pool;
- TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
- TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
- int iotlb_cache_nr;
- rte_spinlock_t iotlb_free_lock;
- SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list;
+ struct iotlb *iotlb[IOTLB_MAX_ASID];
struct inflight_mem_info *inflight_info;
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 4bfb13fb98ce..5581da12d21c 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1433,7 +1433,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
/* Flush IOTLB cache as previous HVAs are now invalid */
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
- vhost_user_iotlb_flush_all(dev);
+ for (i = 0; i < IOTLB_MAX_ASID; i++)
+ vhost_user_iotlb_flush_all(dev, i);
free_mem_region(dev);
rte_free(dev->mem);
@@ -2267,7 +2268,7 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
ctx->msg.size = sizeof(ctx->msg.payload.state);
ctx->fd_num = 0;
- vhost_user_iotlb_flush_all(dev);
+ vhost_user_iotlb_flush_all(dev, vq->asid);
rte_rwlock_write_lock(&vq->access_lock);
vring_invalidate(dev, vq);
@@ -2716,7 +2717,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
pg_sz = hua_to_alignment(dev->mem, (void *)(uintptr_t)vva);
- vhost_user_iotlb_cache_insert(dev, imsg->iova, vva, 0, len, pg_sz, imsg->perm);
+ vhost_user_iotlb_cache_insert(dev, 0, imsg->iova, vva, 0, len, pg_sz, imsg->perm);
for (i = 0; i < dev->nr_vring; i++) {
struct vhost_virtqueue *vq = dev->virtqueue[i];
@@ -2733,7 +2734,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
}
break;
case VHOST_IOTLB_INVALIDATE:
- vhost_user_iotlb_cache_remove(dev, imsg->iova, imsg->size);
+ vhost_user_iotlb_cache_remove(dev, 0, imsg->iova, imsg->size);
for (i = 0; i < dev->nr_vring; i++) {
struct vhost_virtqueue *vq = dev->virtqueue[i];
@@ -3326,7 +3327,7 @@ vhost_user_msg_handler(int vid, int fd)
}
static int
-vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+vhost_user_iotlb_miss(struct virtio_net *dev, int asid __rte_unused, uint64_t iova, uint8_t perm)
{
int ret;
struct vhu_msg_context ctx = {
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 03/10] vhost: add VDUSE API version negotiation
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
2026-02-11 16:14 ` [RFC 01/10] uapi: align VDUSE header for ASID Eugenio Pérez
2026-02-11 16:14 ` [RFC 02/10] vhost: introduce ASID support Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 04/10] vhost: add virtqueues groups support to VDUSE Eugenio Pérez
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
As preliminary step to support new VDUSE API version
introducing ASID support, this patch adds API version
negotiation to keep compatibility with older kernels.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/vduse.c | 14 ++++++++++++--
lib/vhost/vhost.h | 1 +
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 49f2f23b9703..fc52b4cd0703 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -25,7 +25,7 @@
#include "vhost.h"
#include "virtio_net_ctrl.h"
-#define VHOST_VDUSE_API_VERSION 0
+#define VHOST_VDUSE_API_VERSION 0ULL
#define VDUSE_CTRL_PATH "/dev/vduse/control"
struct vduse {
@@ -680,7 +680,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
uint32_t i, max_queue_pairs, total_queues;
struct virtio_net *dev;
struct virtio_net_config vnet_config = {{ 0 }};
- uint64_t ver = VHOST_VDUSE_API_VERSION;
+ uint64_t ver;
uint64_t features;
const char *name = path + strlen("/dev/vduse/");
bool reconnect = false;
@@ -700,6 +700,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
return -1;
}
+ if (ioctl(control_fd, VDUSE_GET_API_VERSION, &ver)) {
+ VHOST_CONFIG_LOG(name, ERR, "Failed to get API version: %s", strerror(errno));
+ ret = -1;
+ goto out_ctrl_close;
+ }
+
+ ver = RTE_MIN(ver, VHOST_VDUSE_API_VERSION);
+ VHOST_CONFIG_LOG(name, INFO, "Using VDUSE API version %" PRIu64 "", ver);
+
if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) {
VHOST_CONFIG_LOG(name, ERR, "Failed to set API version: %" PRIu64 ": %s",
ver, strerror(errno));
@@ -800,6 +809,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
strncpy(dev->ifname, path, IF_NAME_SZ - 1);
dev->vduse_ctrl_fd = control_fd;
dev->vduse_dev_fd = dev_fd;
+ dev->vduse_api_ver = ver;
ret = vduse_reconnect_log_map(dev, !reconnect);
if (ret < 0)
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index fef493016df5..50b09da5ecf7 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -532,6 +532,7 @@ struct __rte_cache_aligned virtio_net {
int postcopy_listening;
int vduse_ctrl_fd;
int vduse_dev_fd;
+ uint64_t vduse_api_ver;
struct vhost_virtqueue *cvq;
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 04/10] vhost: add virtqueues groups support to VDUSE
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (2 preceding siblings ...)
2026-02-11 16:14 ` [RFC 03/10] vhost: add VDUSE API version negotiation Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 05/10] vhost: add ASID support to VDUSE IOTLB operations Eugenio Pérez
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
VDUSE API version 1 introduces the notion of virtqueue
groups, which once supported, enables the support of
multiple addresses spaces.
For VDUSE networking devices, we need two groups, one for
the datapath queues, and one for the control queue.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/vduse.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 42 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index fc52b4cd0703..bb3a4363baad 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -38,12 +38,24 @@ static const char * const vduse_reqs_str[] = {
"VDUSE_GET_VQ_STATE",
"VDUSE_SET_STATUS",
"VDUSE_UPDATE_IOTLB",
+ "VDUSE_SET_VQ_GROUP_ASID",
};
#define vduse_req_id_to_str(id) \
(id < RTE_DIM(vduse_reqs_str) ? \
vduse_reqs_str[id] : "Unknown")
+static uint64_t vduse_vq_to_group(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ if (dev->vduse_api_ver < 1)
+ return 0;
+
+ if (vq == dev->cvq)
+ return 1;
+
+ return 0;
+}
+
static int
vduse_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
@@ -271,6 +283,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
vq->size = 0;
vq->last_used_idx = 0;
vq->last_avail_idx = 0;
+ vq->asid = 0;
}
/*
@@ -410,6 +423,7 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
struct vduse_dev_response resp;
struct vhost_virtqueue *vq;
uint8_t old_status = dev->status;
+ uint32_t i;
int ret;
memset(&resp, 0, sizeof(resp));
@@ -450,6 +464,26 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
req.iova.last - req.iova.start + 1); /* ToDo: use ASID once API available, using 0 for now */
resp.result = VDUSE_REQ_RESULT_OK;
break;
+ case VDUSE_SET_VQ_GROUP_ASID:
+ if (dev->vduse_api_ver < 1) {
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+
+ VHOST_CONFIG_LOG(dev->ifname, INFO, "\tAssigning ASID %d to group %d",
+ req.vq_group_asid.asid, req.vq_group_asid.group);
+
+ for (i = 0; i < dev->nr_vring; i++) {
+ vq = dev->virtqueue[i];
+
+ if (vduse_vq_to_group(dev, vq) == req.vq_group_asid.group) {
+ vq->asid = req.vq_group_asid.asid;
+ VHOST_CONFIG_LOG(dev->ifname, INFO, "\t\tVQ %d gets ASID %d",
+ i, req.vq_group_asid.asid);
+ }
+ }
+ resp.result = VDUSE_REQ_RESULT_OK;
+ break;
default:
resp.result = VDUSE_REQ_RESULT_FAILED;
break;
@@ -760,6 +794,10 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
dev_config->features = features;
dev_config->vq_num = total_queues;
dev_config->vq_align = rte_mem_page_size();
+ if (ver >= 1) {
+ dev_config->ngroups = 2;
+ dev_config->nas = 2;
+ }
dev_config->config_size = sizeof(struct virtio_net_config);
memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
@@ -848,11 +886,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
vq = dev->virtqueue[i];
vq->reconnect_log = &dev->reconnect_log->vring[i];
+ if (i == max_queue_pairs * 2)
+ dev->cvq = vq;
+
if (reconnect)
continue;
vq_cfg.index = i;
vq_cfg.max_size = 1024;
+ vq_cfg.group = vduse_vq_to_group(dev, vq);
ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP, &vq_cfg);
if (ret) {
@@ -861,8 +903,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
}
}
- dev->cvq = dev->virtqueue[max_queue_pairs * 2];
-
ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler, NULL, dev);
if (ret) {
VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 05/10] vhost: add ASID support to VDUSE IOTLB operations
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (3 preceding siblings ...)
2026-02-11 16:14 ` [RFC 04/10] vhost: add virtqueues groups support to VDUSE Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 06/10] vhost: claim VDUSE support for API version 1 Eugenio Pérez
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
Make use of the newly introduced address space ID when
calling Vhost IOTLB API.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
Eugenio Pérez changes:
* Use the new GET_FD2 ioctl.
* Adapt the entry.v1 accesses to new format
---
lib/vhost/vduse.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index bb3a4363baad..4e1920f9441e 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -71,7 +71,7 @@ vduse_iotlb_remove_notify(uint64_t addr, uint64_t offset, uint64_t size)
static int
vduse_iotlb_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm __rte_unused)
{
- struct vduse_iotlb_entry entry;
+ struct vduse_iotlb_entry_v2 entry = {};
uint64_t size, page_size;
struct stat stat;
void *mmap_addr;
@@ -79,8 +79,9 @@ vduse_iotlb_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm _
entry.start = iova;
entry.last = iova + 1;
+ entry.asid = asid;
- ret = ioctl(dev->vduse_dev_fd, VDUSE_IOTLB_GET_FD, &entry);
+ ret = ioctl(dev->vduse_dev_fd, VDUSE_IOTLB_GET_FD2, &entry);
if (ret < 0) {
VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get IOTLB entry for 0x%" PRIx64,
iova);
@@ -90,6 +91,7 @@ vduse_iotlb_miss(struct virtio_net *dev, int asid, uint64_t iova, uint8_t perm _
fd = ret;
VHOST_CONFIG_LOG(dev->ifname, DEBUG, "New IOTLB entry:");
+ VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tASID: %d", entry.asid);
VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tIOVA: %" PRIx64 " - %" PRIx64,
(uint64_t)entry.start, (uint64_t)entry.last);
VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\toffset: %" PRIx64, (uint64_t)entry.offset);
@@ -458,10 +460,24 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
resp.result = VDUSE_REQ_RESULT_OK;
break;
case VDUSE_UPDATE_IOTLB:
- VHOST_CONFIG_LOG(dev->ifname, INFO, "\tIOVA range: %" PRIx64 " - %" PRIx64,
- (uint64_t)req.iova.start, (uint64_t)req.iova.last);
- vhost_user_iotlb_cache_remove(dev, 0, req.iova.start,
- req.iova.last - req.iova.start + 1); /* ToDo: use ASID once API available, using 0 for now */
+ {
+ uint64_t start, last;
+ uint32_t asid;
+
+ if (dev->vduse_api_ver < 1) {
+ start = req.iova.start;
+ last = req.iova.last;
+ asid = 0;
+ } else {
+ start = req.iova_v2.start;
+ last = req.iova_v2.last;
+ asid = req.iova_v2.asid;
+ }
+
+ VHOST_CONFIG_LOG(dev->ifname, INFO, "\t(ASID %d) IOVA range: %" PRIx64 " - %" PRIx64,
+ asid, start, last);
+ vhost_user_iotlb_cache_remove(dev, asid, start, last - start + 1);
+ }
resp.result = VDUSE_REQ_RESULT_OK;
break;
case VDUSE_SET_VQ_GROUP_ASID:
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 06/10] vhost: claim VDUSE support for API version 1
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (4 preceding siblings ...)
2026-02-11 16:14 ` [RFC 05/10] vhost: add ASID support to VDUSE IOTLB operations Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 07/10] vhost: add net status feature to VDUSE Eugenio Pérez
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/vduse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 4e1920f9441e..80020d3d5413 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -25,7 +25,7 @@
#include "vhost.h"
#include "virtio_net_ctrl.h"
-#define VHOST_VDUSE_API_VERSION 0ULL
+#define VHOST_VDUSE_API_VERSION 1ULL
#define VDUSE_CTRL_PATH "/dev/vduse/control"
struct vduse {
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 07/10] vhost: add net status feature to VDUSE
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (5 preceding siblings ...)
2026-02-11 16:14 ` [RFC 06/10] vhost: claim VDUSE support for API version 1 Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 08/10] uapi: Align vduse.h for enable and suspend VDUSE messages Eugenio Pérez
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Maxime Coquelin <maxime.coquelin@redhat.com>
Enable the VIRTIO_NET_F_STATUS feature for VDUSE devices.
This allows the device to report link status (e.g.,
VIRTIO_NET_S_LINK_UP). It also allows the device to signal the driver
that it needs to send gratuitous ARP with VIRTIO_NET_S_ANNOUNCE.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/vduse.c | 32 ++++++++++++++++----------------
lib/vhost/vduse.h | 3 ++-
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 80020d3d5413..04f397bac8b5 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -460,24 +460,23 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
resp.result = VDUSE_REQ_RESULT_OK;
break;
case VDUSE_UPDATE_IOTLB:
- {
- uint64_t start, last;
- uint32_t asid;
-
- if (dev->vduse_api_ver < 1) {
- start = req.iova.start;
- last = req.iova.last;
- asid = 0;
- } else {
- start = req.iova_v2.start;
- last = req.iova_v2.last;
- asid = req.iova_v2.asid;
- }
+ uint64_t start, last;
+ uint32_t asid;
- VHOST_CONFIG_LOG(dev->ifname, INFO, "\t(ASID %d) IOVA range: %" PRIx64 " - %" PRIx64,
- asid, start, last);
- vhost_user_iotlb_cache_remove(dev, asid, start, last - start + 1);
+ if (dev->vduse_api_ver < 1) {
+ start = req.iova.start;
+ last = req.iova.last;
+ asid = 0;
+ } else {
+ start = req.iova_v2.start;
+ last = req.iova_v2.last;
+ asid = req.iova_v2.asid;
}
+
+ VHOST_CONFIG_LOG(dev->ifname, INFO, "\t(ASID %d) IOVA range: %" PRIx64 " - %" PRIx64,
+ asid, start, last);
+ vhost_user_iotlb_cache_remove(dev, asid, start, last - start + 1);
+
resp.result = VDUSE_REQ_RESULT_OK;
break;
case VDUSE_SET_VQ_GROUP_ASID:
@@ -801,6 +800,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
goto out_ctrl_close;
}
+ vnet_config.status = VIRTIO_NET_S_LINK_UP;
vnet_config.max_virtqueue_pairs = max_queue_pairs;
memset(dev_config, 0, sizeof(struct vduse_dev_config));
diff --git a/lib/vhost/vduse.h b/lib/vhost/vduse.h
index b2515bb9df76..d697f85be5cc 100644
--- a/lib/vhost/vduse.h
+++ b/lib/vhost/vduse.h
@@ -7,7 +7,8 @@
#include "vhost.h"
-#define VDUSE_NET_SUPPORTED_FEATURES VIRTIO_NET_SUPPORTED_FEATURES
+#define VDUSE_NET_SUPPORTED_FEATURES (VIRTIO_NET_SUPPORTED_FEATURES | \
+ (1ULL << VIRTIO_NET_F_STATUS))
int vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf);
int vduse_device_destroy(const char *path);
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 08/10] uapi: Align vduse.h for enable and suspend VDUSE messages
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (6 preceding siblings ...)
2026-02-11 16:14 ` [RFC 07/10] vhost: add net status feature to VDUSE Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 09/10] vhost: Support VDUSE QUEUE_READY feature Eugenio Pérez
2026-02-11 16:14 ` [RFC 10/10] vhost: Support vduse suspend feature Eugenio Pérez
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Super User <root@wsfd-advnetlab45.anl.eng.rdu2.dc.redhat.com>
This is a prerequisite for next patches to use them.
These features are required to properly support live migration and proper
device initialization ordering.
These are not in maintainer's branch at the moment:
https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
https://lore.kernel.org/lkml/20260211120158.2501592-1-eperezma@redhat.com
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
kernel/linux/uapi/linux/vduse.h | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/kernel/linux/uapi/linux/vduse.h b/kernel/linux/uapi/linux/vduse.h
index e19b3c0f51b5..156446cdc712 100644
--- a/kernel/linux/uapi/linux/vduse.h
+++ b/kernel/linux/uapi/linux/vduse.h
@@ -14,6 +14,13 @@
#define VDUSE_API_VERSION_1 1
+/* Features support */
+
+#define VDUSE_API_VERSION_2 2
+
+/* The VDUSE instance expects a request for vq ready */
+#define VDUSE_F_QUEUE_READY 0
+
/*
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
* This is used for future extension.
@@ -33,6 +40,7 @@
* @vq_align: the allocation alignment of virtqueue's metadata
* @ngroups: number of vq groups that VDUSE device declares
* @nas: number of address spaces that VDUSE device declares
+ * @vduse_features: VDUSE features
* @reserved: for future use, needs to be initialized to zero
* @config_size: the size of the configuration space
* @config: the buffer of the configuration space
@@ -49,7 +57,8 @@ struct vduse_dev_config {
__u32 vq_align;
__u32 ngroups; /* if VDUSE_API_VERSION >= 1 */
__u32 nas; /* if VDUSE_API_VERSION >= 1 */
- __u32 reserved[11];
+ __u64 vduse_features;
+ __u32 reserved[9];
__u32 config_size;
__u8 config[];
};
@@ -63,6 +72,9 @@ struct vduse_dev_config {
*/
#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+/* Get the VDUSE supported features */
+#define VDUSE_GET_FEATURES _IOR(VDUSE_BASE, 0x04, __u64)
+
/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
/**
@@ -325,6 +337,7 @@ enum vduse_req_type {
VDUSE_SET_STATUS,
VDUSE_UPDATE_IOTLB,
VDUSE_SET_VQ_GROUP_ASID,
+ VDUSE_SET_VQ_READY,
};
/**
@@ -372,6 +385,15 @@ struct vduse_iova_range_v2 {
__u32 padding;
};
+/**
+ * struct vduse_vq_ready - Virtqueue ready request message
+ * @num: Virtqueue number
+ */
+struct vduse_vq_ready {
+ __u32 num;
+ __u32 ready;
+};
+
/**
* struct vduse_dev_request - control request
* @type: request type
@@ -382,6 +404,7 @@ struct vduse_iova_range_v2 {
* @iova: IOVA range for updating
* @iova_v2: IOVA range for updating if API_VERSION >= 1
* @vq_group_asid: ASID of a virtqueue group
+ * @vq_ready: Virtqueue ready request
* @padding: padding
*
* Structure used by read(2) on /dev/vduse/$NAME.
@@ -399,6 +422,10 @@ struct vduse_dev_request {
*/
struct vduse_iova_range_v2 iova_v2;
struct vduse_vq_group_asid vq_group_asid;
+
+ /* Only if VDUSE_F_QUEUE_READY is negotiated */
+ struct vduse_vq_ready vq_ready;
+
__u32 padding[32];
};
};
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 09/10] vhost: Support VDUSE QUEUE_READY feature
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (7 preceding siblings ...)
2026-02-11 16:14 ` [RFC 08/10] uapi: Align vduse.h for enable and suspend VDUSE messages Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
2026-02-11 16:14 ` [RFC 10/10] vhost: Support vduse suspend feature Eugenio Pérez
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Super User <root@wsfd-advnetlab45.anl.eng.rdu2.dc.redhat.com>
Add support for the VDUSE_F_QUEUE_READY feature.
In VDUSE, the dataplane is enabled only after control virtqueue so the
device is fully configured in the destination of a live migration before
the dataplane starts. This message signals the VDUSE device when the
dataplane queues should be enabled.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
lib/vhost/vduse.c | 79 +++++++++++++++++++++++++++++++++++++++++++++--
lib/vhost/vhost.h | 1 +
2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 04f397bac8b5..9c0801453605 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -25,7 +25,7 @@
#include "vhost.h"
#include "virtio_net_ctrl.h"
-#define VHOST_VDUSE_API_VERSION 1ULL
+#define VHOST_VDUSE_API_VERSION 2ULL
#define VDUSE_CTRL_PATH "/dev/vduse/control"
struct vduse {
@@ -39,12 +39,15 @@ static const char * const vduse_reqs_str[] = {
"VDUSE_SET_STATUS",
"VDUSE_UPDATE_IOTLB",
"VDUSE_SET_VQ_GROUP_ASID",
+ "VDUSE_SET_VQ_READY",
};
#define vduse_req_id_to_str(id) \
(id < RTE_DIM(vduse_reqs_str) ? \
vduse_reqs_str[id] : "Unknown")
+static const uint64_t supported_vduse_features = RTE_BIT64(VDUSE_F_QUEUE_READY);
+
static uint64_t vduse_vq_to_group(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
if (dev->vduse_api_ver < 1)
@@ -499,6 +502,48 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
}
resp.result = VDUSE_REQ_RESULT_OK;
break;
+ case VDUSE_SET_VQ_READY:
+ if (!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)) {
+ /*
+ * dev->notify_ops is NULL if !S_DRIVER_OK,
+ * vduse_device_start will check the queue readiness.
+ */
+ resp.result = VDUSE_REQ_RESULT_OK;
+ break;
+ }
+ if (dev->vduse_api_ver < 2 ||
+ !(dev->vduse_features & RTE_BIT64(VDUSE_F_QUEUE_READY))) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "Unexpected message ver %"PRIu64" ready feature %d",
+ dev->vduse_api_ver,
+ !!(dev->vduse_features &
+ RTE_BIT64(VDUSE_F_QUEUE_READY)));
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+
+ i = req.vq_ready.num;
+ vq = dev->virtqueue[i];
+ if (!dev->notify_ops || !dev->notify_ops->vring_state_changed) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "No ops->vring_state_changed");
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+
+ ret = dev->notify_ops->vring_state_changed(dev->vid, i,
+ req.vq_ready.ready);
+ VHOST_CONFIG_LOG(dev->ifname, INFO,
+ "\t\t VQ %d gets ready %d ok %d", i,
+ req.vq_ready.ready, ret);
+ if (ret != 0) {
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+
+ vq->enabled = req.vq_ready.ready;
+ resp.result = VDUSE_REQ_RESULT_OK;
+ break;
default:
resp.result = VDUSE_REQ_RESULT_FAILED;
break;
@@ -516,7 +561,8 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
/* Poll virtqueues ready states before starting device */
- ret = vduse_wait_for_virtqueues_ready(dev);
+ ret = dev->vduse_features & RTE_BIT64(VDUSE_F_QUEUE_READY) ? 0
+ : vduse_wait_for_virtqueues_ready(dev);
if (ret < 0) {
VHOST_CONFIG_LOG(dev->ifname, ERR,
"Failed to wait for virtqueues ready, aborting device start");
@@ -722,6 +768,27 @@ vduse_reconnect_start_device(struct virtio_net *dev)
return ret;
}
+/* If some error occurs just continue as if the kernel exposed no features */
+static uint64_t
+vduse_device_get_vduse_features(int control_fd, const char *log_name)
+{
+ uint64_t vduse_kernel_features;
+ int ret;
+
+ ret = ioctl(control_fd, VDUSE_GET_FEATURES, &vduse_kernel_features);
+ if (ret < 0) {
+ VHOST_CONFIG_LOG(log_name, ERR,
+ "Failed to get kernel VDUSE features: %d(%s)",
+ errno, strerror(errno));
+ return 0;
+ }
+
+ VHOST_CONFIG_LOG(log_name, DEBUG,
+ "Setting vhost kernel features: %lx",
+ vduse_kernel_features & supported_vduse_features);
+ return vduse_kernel_features & supported_vduse_features;
+}
+
int
vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf)
{
@@ -730,7 +797,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
struct virtio_net *dev;
struct virtio_net_config vnet_config = {{ 0 }};
uint64_t ver;
- uint64_t features;
+ uint64_t features, vduse_features = 0;
const char *name = path + strlen("/dev/vduse/");
bool reconnect = false;
@@ -814,6 +881,11 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
dev_config->ngroups = 2;
dev_config->nas = 2;
}
+
+ if (ver >= 2) {
+ vduse_features = vduse_device_get_vduse_features(control_fd, name);
+ dev_config->vduse_features = vduse_features;
+ }
dev_config->config_size = sizeof(struct virtio_net_config);
memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
@@ -864,6 +936,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
dev->vduse_ctrl_fd = control_fd;
dev->vduse_dev_fd = dev_fd;
dev->vduse_api_ver = ver;
+ dev->vduse_features = vduse_features;
ret = vduse_reconnect_log_map(dev, !reconnect);
if (ret < 0)
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 50b09da5ecf7..ce2e1a271c6f 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -533,6 +533,7 @@ struct __rte_cache_aligned virtio_net {
int vduse_ctrl_fd;
int vduse_dev_fd;
uint64_t vduse_api_ver;
+ uint64_t vduse_features;
struct vhost_virtqueue *cvq;
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC 10/10] vhost: Support vduse suspend feature
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
` (8 preceding siblings ...)
2026-02-11 16:14 ` [RFC 09/10] vhost: Support VDUSE QUEUE_READY feature Eugenio Pérez
@ 2026-02-11 16:14 ` Eugenio Pérez
9 siblings, 0 replies; 11+ messages in thread
From: Eugenio Pérez @ 2026-02-11 16:14 UTC (permalink / raw)
To: Maxime Coquelin; +Cc: jasowang, david.marchand, mst, dev, Yongji Xie, chenbox
From: Super User <root@wsfd-advnetlab45.anl.eng.rdu2.dc.redhat.com>
Add support for the VDUSE_F_SUSPEND feature.
The suspend feature allows the driver to stop the device from processing
the virtqueues. This ensures that the virtqueue state can be fetched
reliably in a live migration.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
kernel/linux/uapi/linux/vduse.h | 4 ++++
lib/vhost/vduse.c | 40 ++++++++++++++++++++++++++++++++-
lib/vhost/vhost.h | 1 +
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/kernel/linux/uapi/linux/vduse.h b/kernel/linux/uapi/linux/vduse.h
index 156446cdc712..ca510027cac2 100644
--- a/kernel/linux/uapi/linux/vduse.h
+++ b/kernel/linux/uapi/linux/vduse.h
@@ -21,6 +21,9 @@
/* The VDUSE instance expects a request for vq ready */
#define VDUSE_F_QUEUE_READY 0
+/* The VDUSE instance expects a request for suspend */
+#define VDUSE_F_SUSPEND 1
+
/*
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
* This is used for future extension.
@@ -338,6 +341,7 @@ enum vduse_req_type {
VDUSE_UPDATE_IOTLB,
VDUSE_SET_VQ_GROUP_ASID,
VDUSE_SET_VQ_READY,
+ VDUSE_SUSPEND,
};
/**
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 9c0801453605..2f421a06a2d2 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -46,7 +46,8 @@ static const char * const vduse_reqs_str[] = {
(id < RTE_DIM(vduse_reqs_str) ? \
vduse_reqs_str[id] : "Unknown")
-static const uint64_t supported_vduse_features = RTE_BIT64(VDUSE_F_QUEUE_READY);
+static const uint64_t supported_vduse_features =
+ RTE_BIT64(VDUSE_F_QUEUE_READY) | RTE_BIT64(VDUSE_F_SUSPEND);
static uint64_t vduse_vq_to_group(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
@@ -521,6 +522,12 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
resp.result = VDUSE_REQ_RESULT_FAILED;
break;
}
+ if (dev->vduse_suspended) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "SET_VQ_READY received on suspended device");
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
i = req.vq_ready.num;
vq = dev->virtqueue[i];
@@ -544,6 +551,37 @@ vduse_events_handler(int fd, void *arg, int *close __rte_unused)
vq->enabled = req.vq_ready.ready;
resp.result = VDUSE_REQ_RESULT_OK;
break;
+ case VDUSE_SUSPEND:
+ if (dev->vduse_api_ver < 2) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "Unexpected suspend message with ver %"PRIu64,
+ dev->vduse_api_ver);
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+ if (!(dev->vduse_features & RTE_BIT64(VDUSE_F_SUSPEND))) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "Unnegotiated suspend message");
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+ if (!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)) {
+ VHOST_CONFIG_LOG(dev->ifname, ERR,
+ "Unexpected suspend message with no DRIVER_OK");
+ resp.result = VDUSE_REQ_RESULT_FAILED;
+ break;
+ }
+ for (i = 0; dev->notify_ops->vring_state_changed &&
+ i < dev->nr_vring; i++) {
+ if (dev->virtqueue[i] == dev->cvq)
+ continue;
+
+ dev->notify_ops->vring_state_changed(dev->vid, i, false);
+ }
+ dev->vduse_suspended = true;
+ resp.result = VDUSE_REQ_RESULT_OK;
+ break;
+
default:
resp.result = VDUSE_REQ_RESULT_FAILED;
break;
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index ce2e1a271c6f..73f0c9397cd0 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -534,6 +534,7 @@ struct __rte_cache_aligned virtio_net {
int vduse_dev_fd;
uint64_t vduse_api_ver;
uint64_t vduse_features;
+ bool vduse_suspended;
struct vhost_virtqueue *cvq;
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2026-02-11 16:16 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-11 16:14 [RFC 00/10] Add vduse live migration features Eugenio Pérez
2026-02-11 16:14 ` [RFC 01/10] uapi: align VDUSE header for ASID Eugenio Pérez
2026-02-11 16:14 ` [RFC 02/10] vhost: introduce ASID support Eugenio Pérez
2026-02-11 16:14 ` [RFC 03/10] vhost: add VDUSE API version negotiation Eugenio Pérez
2026-02-11 16:14 ` [RFC 04/10] vhost: add virtqueues groups support to VDUSE Eugenio Pérez
2026-02-11 16:14 ` [RFC 05/10] vhost: add ASID support to VDUSE IOTLB operations Eugenio Pérez
2026-02-11 16:14 ` [RFC 06/10] vhost: claim VDUSE support for API version 1 Eugenio Pérez
2026-02-11 16:14 ` [RFC 07/10] vhost: add net status feature to VDUSE Eugenio Pérez
2026-02-11 16:14 ` [RFC 08/10] uapi: Align vduse.h for enable and suspend VDUSE messages Eugenio Pérez
2026-02-11 16:14 ` [RFC 09/10] vhost: Support VDUSE QUEUE_READY feature Eugenio Pérez
2026-02-11 16:14 ` [RFC 10/10] vhost: Support vduse suspend feature Eugenio Pérez
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox