public inbox for virtualization@lists.linux-foundation.org
 help / color / mirror / Atom feed
* [PATCH] vduse: Add suspend
@ 2026-02-11 12:01 Eugenio Pérez
  2026-03-03  7:26 ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Eugenio Pérez @ 2026-02-11 12:01 UTC (permalink / raw)
  To: Michael S . Tsirkin
  Cc: Yongji Xie, virtualization, linux-kernel, Eugenio Pérez,
	Laurent Vivier, Stefano Garzarella, Cindy Lu, Xuan Zhuo,
	Jason Wang, Maxime Coquelin

Implement suspend operation for vduse devices, so vhost-vdpa will offer
that backend feature and userspace can effectively suspend the device.

This is a must before get virtqueue indexes (base) for live migration,
since the device could modify them after userland gets them.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
This series depends on
https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
 include/uapi/linux/vduse.h         |  4 ++
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 59d9c4718d86..bdcc114e2710 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -54,7 +54,8 @@
 #define IRQ_UNBOUND -1
 
 /* Supported VDUSE features */
-static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
+static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
+				       BIT_U64(VDUSE_F_SUSPEND);
 
 /*
  * VDUSE instance have not asked the vduse API version, so assume 0.
@@ -85,6 +86,7 @@ struct vduse_virtqueue {
 	int irq_effective_cpu;
 	struct cpumask irq_affinity;
 	struct kobject kobj;
+	struct vduse_dev *dev;
 };
 
 struct vduse_dev;
@@ -134,6 +136,7 @@ struct vduse_dev {
 	int minor;
 	bool broken;
 	bool connected;
+	bool suspended;
 	u64 api_version;
 	u64 device_features;
 	u64 driver_features;
@@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
 
 	down_write(&dev->rwsem);
 
+	dev->suspended = false;
 	dev->status = 0;
 	dev->driver_features = 0;
 	dev->generation++;
@@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 	struct vduse_virtqueue *vq = dev->vqs[idx];
 
+	guard(rwsem_read)(&vq->dev->rwsem);
+	if (vq->dev->suspended)
+		return;
+
 	if (!eventfd_signal_allowed()) {
 		schedule_work(&vq->kick);
 		return;
@@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 	return 0;
 }
 
+static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_dev_msg msg = { 0 };
+	int ret;
+
+	msg.req.type = VDUSE_SUSPEND;
+
+	ret = vduse_dev_msg_sync(dev, &msg);
+	if (ret == 0) {
+		scoped_guard(rwsem_write, &dev->rwsem)
+			dev->suspended = true;
+
+		cancel_work_sync(&dev->inject);
+		for (u32 i = 0; i < dev->vq_num; i++)
+			cancel_work_sync(&dev->vqs[i]->inject);
+	}
+
+	return ret;
+}
+
 static void vduse_vdpa_free(struct vdpa_device *vdpa)
 {
 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
@@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 	.free			= vduse_vdpa_free,
 };
 
+static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
+	.set_vq_address		= vduse_vdpa_set_vq_address,
+	.kick_vq		= vduse_vdpa_kick_vq,
+	.set_vq_cb		= vduse_vdpa_set_vq_cb,
+	.set_vq_num             = vduse_vdpa_set_vq_num,
+	.get_vq_size		= vduse_vdpa_get_vq_size,
+	.get_vq_group		= vduse_get_vq_group,
+	.set_vq_ready		= vduse_vdpa_set_vq_ready,
+	.get_vq_ready		= vduse_vdpa_get_vq_ready,
+	.set_vq_state		= vduse_vdpa_set_vq_state,
+	.get_vq_state		= vduse_vdpa_get_vq_state,
+	.get_vq_align		= vduse_vdpa_get_vq_align,
+	.get_device_features	= vduse_vdpa_get_device_features,
+	.set_driver_features	= vduse_vdpa_set_driver_features,
+	.get_driver_features	= vduse_vdpa_get_driver_features,
+	.set_config_cb		= vduse_vdpa_set_config_cb,
+	.get_vq_num_max		= vduse_vdpa_get_vq_num_max,
+	.get_device_id		= vduse_vdpa_get_device_id,
+	.get_vendor_id		= vduse_vdpa_get_vendor_id,
+	.get_status		= vduse_vdpa_get_status,
+	.set_status		= vduse_vdpa_set_status,
+	.get_config_size	= vduse_vdpa_get_config_size,
+	.get_config		= vduse_vdpa_get_config,
+	.set_config		= vduse_vdpa_set_config,
+	.get_generation		= vduse_vdpa_get_generation,
+	.set_vq_affinity	= vduse_vdpa_set_vq_affinity,
+	.get_vq_affinity	= vduse_vdpa_get_vq_affinity,
+	.reset			= vduse_vdpa_reset,
+	.set_map		= vduse_vdpa_set_map,
+	.set_group_asid		= vduse_set_group_asid,
+	.get_vq_map		= vduse_get_vq_map,
+	.suspend		= vduse_vdpa_suspend,
+	.free			= vduse_vdpa_free,
+};
+
 static void vduse_dev_sync_single_for_device(union virtio_map token,
 					     dma_addr_t dma_addr, size_t size,
 					     enum dma_data_direction dir)
@@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
 {
 	struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 
+	guard(rwsem_read)(&dev->rwsem);
+	if (dev->suspended)
+		return;
+
 	spin_lock_bh(&dev->irq_lock);
 	if (dev->config_cb.callback)
 		dev->config_cb.callback(dev->config_cb.private);
@@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
 	struct vduse_virtqueue *vq = container_of(work,
 					struct vduse_virtqueue, inject);
 
+	guard(rwsem_read)(&vq->dev->rwsem);
+	if (vq->dev->suspended)
+		return;
+
 	spin_lock_bh(&vq->irq_lock);
 	if (vq->ready && vq->cb.callback)
 		vq->cb.callback(vq->cb.private);
@@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 	int ret = -EINVAL;
 
 	down_read(&dev->rwsem);
+	if (dev->suspended)
+		return ret;
+
 	if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 		goto unlock;
 
@@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
 		}
 
 		dev->vqs[i]->index = i;
+		dev->vqs[i]->dev = dev;
 		dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
@@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
 {
 	struct vduse_vdpa *vdev;
+	const struct vdpa_config_ops *ops;
 
 	if (dev->vdev)
 		return -EEXIST;
 
+	if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
+		ops = &vduse_vdpa_config_ops_with_suspend;
+	else
+		ops = &vduse_vdpa_config_ops;
+
 	vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
-				 &vduse_vdpa_config_ops, &vduse_map_ops,
+				 ops, &vduse_map_ops,
 				 dev->ngroups, dev->nas, name, true);
 	if (IS_ERR(vdev))
 		return PTR_ERR(vdev);
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
index d39734cef6d3..95b93bc6bac5 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -21,6 +21,9 @@
 /* The VDUSE instance expects a request for vq ready */
 #define VDUSE_F_QUEUE_READY	0
 
+/* The VDUSE instance expects a request for suspend */
+#define VDUSE_F_SUSPEND		1
+
 /*
  * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
  * This is used for future extension.
@@ -338,6 +341,7 @@ enum vduse_req_type {
 	VDUSE_UPDATE_IOTLB,
 	VDUSE_SET_VQ_GROUP_ASID,
 	VDUSE_SET_VQ_READY,
+	VDUSE_SUSPEND,
 };
 
 /**
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] vduse: Add suspend
  2026-02-11 12:01 [PATCH] vduse: Add suspend Eugenio Pérez
@ 2026-03-03  7:26 ` Jason Wang
  2026-03-03 10:51   ` Eugenio Perez Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2026-03-03  7:26 UTC (permalink / raw)
  To: Eugenio Pérez
  Cc: Michael S . Tsirkin, Yongji Xie, virtualization, linux-kernel,
	Laurent Vivier, Stefano Garzarella, Cindy Lu, Xuan Zhuo,
	Maxime Coquelin

On Wed, Feb 11, 2026 at 8:02 PM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> Implement suspend operation for vduse devices, so vhost-vdpa will offer
> that backend feature and userspace can effectively suspend the device.
>
> This is a must before get virtqueue indexes (base) for live migration,
> since the device could modify them after userland gets them.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> This series depends on
> https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
> ---
>  drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
>  include/uapi/linux/vduse.h         |  4 ++
>  2 files changed, 88 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 59d9c4718d86..bdcc114e2710 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -54,7 +54,8 @@
>  #define IRQ_UNBOUND -1
>
>  /* Supported VDUSE features */
> -static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
> +static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
> +                                      BIT_U64(VDUSE_F_SUSPEND);
>
>  /*
>   * VDUSE instance have not asked the vduse API version, so assume 0.
> @@ -85,6 +86,7 @@ struct vduse_virtqueue {
>         int irq_effective_cpu;
>         struct cpumask irq_affinity;
>         struct kobject kobj;
> +       struct vduse_dev *dev;
>  };
>
>  struct vduse_dev;
> @@ -134,6 +136,7 @@ struct vduse_dev {
>         int minor;
>         bool broken;
>         bool connected;
> +       bool suspended;
>         u64 api_version;
>         u64 device_features;
>         u64 driver_features;
> @@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
>
>         down_write(&dev->rwsem);
>
> +       dev->suspended = false;
>         dev->status = 0;
>         dev->driver_features = 0;
>         dev->generation++;
> @@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
>         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
>         struct vduse_virtqueue *vq = dev->vqs[idx];
>
> +       guard(rwsem_read)(&vq->dev->rwsem);
> +       if (vq->dev->suspended)
> +               return;
> +
>         if (!eventfd_signal_allowed()) {
>                 schedule_work(&vq->kick);
>                 return;
> @@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
>         return 0;
>  }
>
> +static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
> +{
> +       struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> +       struct vduse_dev_msg msg = { 0 };
> +       int ret;
> +
> +       msg.req.type = VDUSE_SUSPEND;
> +
> +       ret = vduse_dev_msg_sync(dev, &msg);
> +       if (ret == 0) {
> +               scoped_guard(rwsem_write, &dev->rwsem)
> +                       dev->suspended = true;
> +
> +               cancel_work_sync(&dev->inject);
> +               for (u32 i = 0; i < dev->vq_num; i++)
> +                       cancel_work_sync(&dev->vqs[i]->inject);

Should we cancel those before msy sync?

> +       }
> +
> +       return ret;
> +}
> +
>  static void vduse_vdpa_free(struct vdpa_device *vdpa)
>  {
>         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> @@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
>         .free                   = vduse_vdpa_free,
>  };
>
> +static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
> +       .set_vq_address         = vduse_vdpa_set_vq_address,
> +       .kick_vq                = vduse_vdpa_kick_vq,
> +       .set_vq_cb              = vduse_vdpa_set_vq_cb,
> +       .set_vq_num             = vduse_vdpa_set_vq_num,
> +       .get_vq_size            = vduse_vdpa_get_vq_size,
> +       .get_vq_group           = vduse_get_vq_group,
> +       .set_vq_ready           = vduse_vdpa_set_vq_ready,
> +       .get_vq_ready           = vduse_vdpa_get_vq_ready,
> +       .set_vq_state           = vduse_vdpa_set_vq_state,
> +       .get_vq_state           = vduse_vdpa_get_vq_state,
> +       .get_vq_align           = vduse_vdpa_get_vq_align,
> +       .get_device_features    = vduse_vdpa_get_device_features,
> +       .set_driver_features    = vduse_vdpa_set_driver_features,
> +       .get_driver_features    = vduse_vdpa_get_driver_features,
> +       .set_config_cb          = vduse_vdpa_set_config_cb,
> +       .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
> +       .get_device_id          = vduse_vdpa_get_device_id,
> +       .get_vendor_id          = vduse_vdpa_get_vendor_id,
> +       .get_status             = vduse_vdpa_get_status,
> +       .set_status             = vduse_vdpa_set_status,
> +       .get_config_size        = vduse_vdpa_get_config_size,
> +       .get_config             = vduse_vdpa_get_config,
> +       .set_config             = vduse_vdpa_set_config,
> +       .get_generation         = vduse_vdpa_get_generation,
> +       .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
> +       .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
> +       .reset                  = vduse_vdpa_reset,
> +       .set_map                = vduse_vdpa_set_map,
> +       .set_group_asid         = vduse_set_group_asid,
> +       .get_vq_map             = vduse_get_vq_map,
> +       .suspend                = vduse_vdpa_suspend,

We can have a

if (suspend_is_supported)
        ...

to avoid having a new set of config ops?

> +       .free                   = vduse_vdpa_free,
> +};
> +
>  static void vduse_dev_sync_single_for_device(union virtio_map token,
>                                              dma_addr_t dma_addr, size_t size,
>                                              enum dma_data_direction dir)
> @@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
>  {
>         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
>
> +       guard(rwsem_read)(&dev->rwsem);
> +       if (dev->suspended)
> +               return;

Device can still modify memory, so I wonder how much this can help.

> +
>         spin_lock_bh(&dev->irq_lock);
>         if (dev->config_cb.callback)
>                 dev->config_cb.callback(dev->config_cb.private);
> @@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
>         struct vduse_virtqueue *vq = container_of(work,
>                                         struct vduse_virtqueue, inject);
>
> +       guard(rwsem_read)(&vq->dev->rwsem);
> +       if (vq->dev->suspended)
> +               return;
> +
>         spin_lock_bh(&vq->irq_lock);
>         if (vq->ready && vq->cb.callback)
>                 vq->cb.callback(vq->cb.private);
> @@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
>         int ret = -EINVAL;
>
>         down_read(&dev->rwsem);
> +       if (dev->suspended)
> +               return ret;
> +
>         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
>                 goto unlock;
>
> @@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
>                 }
>
>                 dev->vqs[i]->index = i;
> +               dev->vqs[i]->dev = dev;
>                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
>                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
>                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> @@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
>  static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
>  {
>         struct vduse_vdpa *vdev;
> +       const struct vdpa_config_ops *ops;
>
>         if (dev->vdev)
>                 return -EEXIST;
>
> +       if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
> +               ops = &vduse_vdpa_config_ops_with_suspend;
> +       else
> +               ops = &vduse_vdpa_config_ops;
> +
>         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
> -                                &vduse_vdpa_config_ops, &vduse_map_ops,
> +                                ops, &vduse_map_ops,
>                                  dev->ngroups, dev->nas, name, true);
>         if (IS_ERR(vdev))
>                 return PTR_ERR(vdev);
> diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
> index d39734cef6d3..95b93bc6bac5 100644
> --- a/include/uapi/linux/vduse.h
> +++ b/include/uapi/linux/vduse.h
> @@ -21,6 +21,9 @@
>  /* The VDUSE instance expects a request for vq ready */
>  #define VDUSE_F_QUEUE_READY    0
>
> +/* The VDUSE instance expects a request for suspend */
> +#define VDUSE_F_SUSPEND                1
> +
>  /*
>   * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
>   * This is used for future extension.
> @@ -338,6 +341,7 @@ enum vduse_req_type {
>         VDUSE_UPDATE_IOTLB,
>         VDUSE_SET_VQ_GROUP_ASID,
>         VDUSE_SET_VQ_READY,
> +       VDUSE_SUSPEND,
>  };
>
>  /**
> --
> 2.53.0
>

Thanks


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vduse: Add suspend
  2026-03-03  7:26 ` Jason Wang
@ 2026-03-03 10:51   ` Eugenio Perez Martin
  2026-03-04  5:52     ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Eugenio Perez Martin @ 2026-03-03 10:51 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S . Tsirkin, Yongji Xie, virtualization, linux-kernel,
	Laurent Vivier, Stefano Garzarella, Cindy Lu, Xuan Zhuo,
	Maxime Coquelin

On Tue, Mar 3, 2026 at 8:26 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Feb 11, 2026 at 8:02 PM Eugenio Pérez <eperezma@redhat.com> wrote:
> >
> > Implement suspend operation for vduse devices, so vhost-vdpa will offer
> > that backend feature and userspace can effectively suspend the device.
> >
> > This is a must before get virtqueue indexes (base) for live migration,
> > since the device could modify them after userland gets them.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > This series depends on
> > https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
> > ---
> >  drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
> >  include/uapi/linux/vduse.h         |  4 ++
> >  2 files changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index 59d9c4718d86..bdcc114e2710 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -54,7 +54,8 @@
> >  #define IRQ_UNBOUND -1
> >
> >  /* Supported VDUSE features */
> > -static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
> > +static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
> > +                                      BIT_U64(VDUSE_F_SUSPEND);
> >
> >  /*
> >   * VDUSE instance have not asked the vduse API version, so assume 0.
> > @@ -85,6 +86,7 @@ struct vduse_virtqueue {
> >         int irq_effective_cpu;
> >         struct cpumask irq_affinity;
> >         struct kobject kobj;
> > +       struct vduse_dev *dev;
> >  };
> >
> >  struct vduse_dev;
> > @@ -134,6 +136,7 @@ struct vduse_dev {
> >         int minor;
> >         bool broken;
> >         bool connected;
> > +       bool suspended;
> >         u64 api_version;
> >         u64 device_features;
> >         u64 driver_features;
> > @@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> >
> >         down_write(&dev->rwsem);
> >
> > +       dev->suspended = false;
> >         dev->status = 0;
> >         dev->driver_features = 0;
> >         dev->generation++;
> > @@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
> >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> >         struct vduse_virtqueue *vq = dev->vqs[idx];
> >
> > +       guard(rwsem_read)(&vq->dev->rwsem);
> > +       if (vq->dev->suspended)
> > +               return;
> > +
> >         if (!eventfd_signal_allowed()) {
> >                 schedule_work(&vq->kick);
> >                 return;
> > @@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> >         return 0;
> >  }
> >
> > +static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
> > +{
> > +       struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > +       struct vduse_dev_msg msg = { 0 };
> > +       int ret;
> > +
> > +       msg.req.type = VDUSE_SUSPEND;
> > +
> > +       ret = vduse_dev_msg_sync(dev, &msg);
> > +       if (ret == 0) {
> > +               scoped_guard(rwsem_write, &dev->rwsem)
> > +                       dev->suspended = true;
> > +
> > +               cancel_work_sync(&dev->inject);
> > +               for (u32 i = 0; i < dev->vq_num; i++)
> > +                       cancel_work_sync(&dev->vqs[i]->inject);
>
> Should we cancel those before msy sync?
>

The only condition is that the driver must not see interrupts from the
moment the ioctl returns.

Also, disabling it before the msg returns would add complexity because
it needs to be reenabled & call the driver if it falis. Or am I
missing something it adds?

TBH this is more of a hardening than something the VDUSE driver should
do. We could even move to the vdpa core.

> > +       }
> > +
> > +       return ret;
> > +}
> > +
> >  static void vduse_vdpa_free(struct vdpa_device *vdpa)
> >  {
> >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > @@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> >         .free                   = vduse_vdpa_free,
> >  };
> >
> > +static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
> > +       .set_vq_address         = vduse_vdpa_set_vq_address,
> > +       .kick_vq                = vduse_vdpa_kick_vq,
> > +       .set_vq_cb              = vduse_vdpa_set_vq_cb,
> > +       .set_vq_num             = vduse_vdpa_set_vq_num,
> > +       .get_vq_size            = vduse_vdpa_get_vq_size,
> > +       .get_vq_group           = vduse_get_vq_group,
> > +       .set_vq_ready           = vduse_vdpa_set_vq_ready,
> > +       .get_vq_ready           = vduse_vdpa_get_vq_ready,
> > +       .set_vq_state           = vduse_vdpa_set_vq_state,
> > +       .get_vq_state           = vduse_vdpa_get_vq_state,
> > +       .get_vq_align           = vduse_vdpa_get_vq_align,
> > +       .get_device_features    = vduse_vdpa_get_device_features,
> > +       .set_driver_features    = vduse_vdpa_set_driver_features,
> > +       .get_driver_features    = vduse_vdpa_get_driver_features,
> > +       .set_config_cb          = vduse_vdpa_set_config_cb,
> > +       .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
> > +       .get_device_id          = vduse_vdpa_get_device_id,
> > +       .get_vendor_id          = vduse_vdpa_get_vendor_id,
> > +       .get_status             = vduse_vdpa_get_status,
> > +       .set_status             = vduse_vdpa_set_status,
> > +       .get_config_size        = vduse_vdpa_get_config_size,
> > +       .get_config             = vduse_vdpa_get_config,
> > +       .set_config             = vduse_vdpa_set_config,
> > +       .get_generation         = vduse_vdpa_get_generation,
> > +       .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
> > +       .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
> > +       .reset                  = vduse_vdpa_reset,
> > +       .set_map                = vduse_vdpa_set_map,
> > +       .set_group_asid         = vduse_set_group_asid,
> > +       .get_vq_map             = vduse_get_vq_map,
> > +       .suspend                = vduse_vdpa_suspend,
>
> We can have a
>
> if (suspend_is_supported)
>         ...
>
> to avoid having a new set of config ops?
>

It complicates the code of vhost_vdpa in my opinion.

Currently, it is:
if (ops->suspend)

And that would move to something like:
if (ops->suspend) {
  if (ops->suspend_is_supported && !ops->suspend_is_supported(dev)) {
    return; // CANNOT SUSPEND
  }

  ops->suspend(dev)
}

Another option I considered is to follow mlx5's approach and have one
set of ops per device. But I think duplicating the vdpa_ops in the
data section is the cleanest approach. Let me know if you think we
should go with suspend_is_supported callback instead.

> > +       .free                   = vduse_vdpa_free,
> > +};
> > +
> >  static void vduse_dev_sync_single_for_device(union virtio_map token,
> >                                              dma_addr_t dma_addr, size_t size,
> >                                              enum dma_data_direction dir)
> > @@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
> >  {
> >         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
> >
> > +       guard(rwsem_read)(&dev->rwsem);
> > +       if (dev->suspended)
> > +               return;
>
> Device can still modify memory, so I wonder how much this can help.
>

Do you prefer me to remove this guard?

> > +
> >         spin_lock_bh(&dev->irq_lock);
> >         if (dev->config_cb.callback)
> >                 dev->config_cb.callback(dev->config_cb.private);
> > @@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> >         struct vduse_virtqueue *vq = container_of(work,
> >                                         struct vduse_virtqueue, inject);
> >
> > +       guard(rwsem_read)(&vq->dev->rwsem);
> > +       if (vq->dev->suspended)
> > +               return;
> > +
> >         spin_lock_bh(&vq->irq_lock);
> >         if (vq->ready && vq->cb.callback)
> >                 vq->cb.callback(vq->cb.private);
> > @@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> >         int ret = -EINVAL;
> >
> >         down_read(&dev->rwsem);
> > +       if (dev->suspended)
> > +               return ret;
> > +
> >         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> >                 goto unlock;
> >
> > @@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> >                 }
> >
> >                 dev->vqs[i]->index = i;
> > +               dev->vqs[i]->dev = dev;
> >                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
> >                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
> >                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> > @@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
> >  static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
> >  {
> >         struct vduse_vdpa *vdev;
> > +       const struct vdpa_config_ops *ops;
> >
> >         if (dev->vdev)
> >                 return -EEXIST;
> >
> > +       if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
> > +               ops = &vduse_vdpa_config_ops_with_suspend;
> > +       else
> > +               ops = &vduse_vdpa_config_ops;
> > +
> >         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
> > -                                &vduse_vdpa_config_ops, &vduse_map_ops,
> > +                                ops, &vduse_map_ops,
> >                                  dev->ngroups, dev->nas, name, true);
> >         if (IS_ERR(vdev))
> >                 return PTR_ERR(vdev);
> > diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
> > index d39734cef6d3..95b93bc6bac5 100644
> > --- a/include/uapi/linux/vduse.h
> > +++ b/include/uapi/linux/vduse.h
> > @@ -21,6 +21,9 @@
> >  /* The VDUSE instance expects a request for vq ready */
> >  #define VDUSE_F_QUEUE_READY    0
> >
> > +/* The VDUSE instance expects a request for suspend */
> > +#define VDUSE_F_SUSPEND                1
> > +
> >  /*
> >   * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
> >   * This is used for future extension.
> > @@ -338,6 +341,7 @@ enum vduse_req_type {
> >         VDUSE_UPDATE_IOTLB,
> >         VDUSE_SET_VQ_GROUP_ASID,
> >         VDUSE_SET_VQ_READY,
> > +       VDUSE_SUSPEND,
> >  };
> >
> >  /**
> > --
> > 2.53.0
> >
>
> Thanks
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vduse: Add suspend
  2026-03-03 10:51   ` Eugenio Perez Martin
@ 2026-03-04  5:52     ` Jason Wang
  2026-03-04  6:21       ` Eugenio Perez Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2026-03-04  5:52 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: Michael S . Tsirkin, Yongji Xie, virtualization, linux-kernel,
	Laurent Vivier, Stefano Garzarella, Cindy Lu, Xuan Zhuo,
	Maxime Coquelin

On Tue, Mar 3, 2026 at 6:51 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Tue, Mar 3, 2026 at 8:26 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Feb 11, 2026 at 8:02 PM Eugenio Pérez <eperezma@redhat.com> wrote:
> > >
> > > Implement suspend operation for vduse devices, so vhost-vdpa will offer
> > > that backend feature and userspace can effectively suspend the device.
> > >
> > > This is a must before get virtqueue indexes (base) for live migration,
> > > since the device could modify them after userland gets them.
> > >
> > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > ---
> > > This series depends on
> > > https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
> > > ---
> > >  drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
> > >  include/uapi/linux/vduse.h         |  4 ++
> > >  2 files changed, 88 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > index 59d9c4718d86..bdcc114e2710 100644
> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > @@ -54,7 +54,8 @@
> > >  #define IRQ_UNBOUND -1
> > >
> > >  /* Supported VDUSE features */
> > > -static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
> > > +static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
> > > +                                      BIT_U64(VDUSE_F_SUSPEND);
> > >
> > >  /*
> > >   * VDUSE instance have not asked the vduse API version, so assume 0.
> > > @@ -85,6 +86,7 @@ struct vduse_virtqueue {
> > >         int irq_effective_cpu;
> > >         struct cpumask irq_affinity;
> > >         struct kobject kobj;
> > > +       struct vduse_dev *dev;
> > >  };
> > >
> > >  struct vduse_dev;
> > > @@ -134,6 +136,7 @@ struct vduse_dev {
> > >         int minor;
> > >         bool broken;
> > >         bool connected;
> > > +       bool suspended;
> > >         u64 api_version;
> > >         u64 device_features;
> > >         u64 driver_features;
> > > @@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > >
> > >         down_write(&dev->rwsem);
> > >
> > > +       dev->suspended = false;
> > >         dev->status = 0;
> > >         dev->driver_features = 0;
> > >         dev->generation++;
> > > @@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
> > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > >         struct vduse_virtqueue *vq = dev->vqs[idx];
> > >
> > > +       guard(rwsem_read)(&vq->dev->rwsem);
> > > +       if (vq->dev->suspended)
> > > +               return;
> > > +
> > >         if (!eventfd_signal_allowed()) {
> > >                 schedule_work(&vq->kick);
> > >                 return;
> > > @@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> > >         return 0;
> > >  }
> > >
> > > +static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
> > > +{
> > > +       struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > > +       struct vduse_dev_msg msg = { 0 };
> > > +       int ret;
> > > +
> > > +       msg.req.type = VDUSE_SUSPEND;
> > > +
> > > +       ret = vduse_dev_msg_sync(dev, &msg);
> > > +       if (ret == 0) {
> > > +               scoped_guard(rwsem_write, &dev->rwsem)
> > > +                       dev->suspended = true;
> > > +
> > > +               cancel_work_sync(&dev->inject);
> > > +               for (u32 i = 0; i < dev->vq_num; i++)
> > > +                       cancel_work_sync(&dev->vqs[i]->inject);
> >
> > Should we cancel those before msy sync?
> >
>
> The only condition is that the driver must not see interrupts from the
> moment the ioctl returns.

I think you meant "vduse_vdpa_suspend returns" here.

>
> Also, disabling it before the msg returns would add complexity because
> it needs to be reenabled & call the driver if it falis. Or am I
> missing something it adds?

I think not but I have another question:

1) I don't see resume, any reason for that
2) assuming we need to support resume, do we need to record pending
interrupt otherwise we may lose interrupt after resuming

>
> TBH this is more of a hardening than something the VDUSE driver should
> do. We could even move to the vdpa core.
>
> > > +       }
> > > +
> > > +       return ret;
> > > +}
> > > +
> > >  static void vduse_vdpa_free(struct vdpa_device *vdpa)
> > >  {
> > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > > @@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> > >         .free                   = vduse_vdpa_free,
> > >  };
> > >
> > > +static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
> > > +       .set_vq_address         = vduse_vdpa_set_vq_address,
> > > +       .kick_vq                = vduse_vdpa_kick_vq,
> > > +       .set_vq_cb              = vduse_vdpa_set_vq_cb,
> > > +       .set_vq_num             = vduse_vdpa_set_vq_num,
> > > +       .get_vq_size            = vduse_vdpa_get_vq_size,
> > > +       .get_vq_group           = vduse_get_vq_group,
> > > +       .set_vq_ready           = vduse_vdpa_set_vq_ready,
> > > +       .get_vq_ready           = vduse_vdpa_get_vq_ready,
> > > +       .set_vq_state           = vduse_vdpa_set_vq_state,
> > > +       .get_vq_state           = vduse_vdpa_get_vq_state,
> > > +       .get_vq_align           = vduse_vdpa_get_vq_align,
> > > +       .get_device_features    = vduse_vdpa_get_device_features,
> > > +       .set_driver_features    = vduse_vdpa_set_driver_features,
> > > +       .get_driver_features    = vduse_vdpa_get_driver_features,
> > > +       .set_config_cb          = vduse_vdpa_set_config_cb,
> > > +       .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
> > > +       .get_device_id          = vduse_vdpa_get_device_id,
> > > +       .get_vendor_id          = vduse_vdpa_get_vendor_id,
> > > +       .get_status             = vduse_vdpa_get_status,
> > > +       .set_status             = vduse_vdpa_set_status,
> > > +       .get_config_size        = vduse_vdpa_get_config_size,
> > > +       .get_config             = vduse_vdpa_get_config,
> > > +       .set_config             = vduse_vdpa_set_config,
> > > +       .get_generation         = vduse_vdpa_get_generation,
> > > +       .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
> > > +       .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
> > > +       .reset                  = vduse_vdpa_reset,
> > > +       .set_map                = vduse_vdpa_set_map,
> > > +       .set_group_asid         = vduse_set_group_asid,
> > > +       .get_vq_map             = vduse_get_vq_map,
> > > +       .suspend                = vduse_vdpa_suspend,
> >
> > We can have a
> >
> > if (suspend_is_supported)
> >         ...
> >
> > to avoid having a new set of config ops?
> >
>
> It complicates the code of vhost_vdpa in my opinion.
>
> Currently, it is:
> if (ops->suspend)

Okay, I see the problem which is:

static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
{
        struct vdpa_device *vdpa = v->vdpa;
        const struct vdpa_config_ops *ops = vdpa->config;

        return ops->suspend;
}

>
> And that would move to something like:
> if (ops->suspend) {
>   if (ops->suspend_is_supported && !ops->suspend_is_supported(dev)) {
>     return; // CANNOT SUSPEND
>   }
>
>   ops->suspend(dev)
> }
>
> Another option I considered is to follow mlx5's approach and have one
> set of ops per device. But I think duplicating the vdpa_ops in the
> data section is the cleanest approach. Let me know if you think we
> should go with suspend_is_supported callback instead.

I think there's no need then.

>
> > > +       .free                   = vduse_vdpa_free,
> > > +};
> > > +
> > >  static void vduse_dev_sync_single_for_device(union virtio_map token,
> > >                                              dma_addr_t dma_addr, size_t size,
> > >                                              enum dma_data_direction dir)
> > > @@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
> > >  {
> > >         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
> > >
> > > +       guard(rwsem_read)(&dev->rwsem);
> > > +       if (dev->suspended)
> > > +               return;
> >
> > Device can still modify memory, so I wonder how much this can help.
> >
>
> Do you prefer me to remove this guard?

Nope, I misread the code, it should be fine.

Thanks

>
> > > +
> > >         spin_lock_bh(&dev->irq_lock);
> > >         if (dev->config_cb.callback)
> > >                 dev->config_cb.callback(dev->config_cb.private);
> > > @@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > >         struct vduse_virtqueue *vq = container_of(work,
> > >                                         struct vduse_virtqueue, inject);
> > >
> > > +       guard(rwsem_read)(&vq->dev->rwsem);
> > > +       if (vq->dev->suspended)
> > > +               return;
> > > +
> > >         spin_lock_bh(&vq->irq_lock);
> > >         if (vq->ready && vq->cb.callback)
> > >                 vq->cb.callback(vq->cb.private);
> > > @@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > >         int ret = -EINVAL;
> > >
> > >         down_read(&dev->rwsem);
> > > +       if (dev->suspended)
> > > +               return ret;
> > > +
> > >         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> > >                 goto unlock;
> > >
> > > @@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> > >                 }
> > >
> > >                 dev->vqs[i]->index = i;
> > > +               dev->vqs[i]->dev = dev;
> > >                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
> > >                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
> > >                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> > > @@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
> > >  static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
> > >  {
> > >         struct vduse_vdpa *vdev;
> > > +       const struct vdpa_config_ops *ops;
> > >
> > >         if (dev->vdev)
> > >                 return -EEXIST;
> > >
> > > +       if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
> > > +               ops = &vduse_vdpa_config_ops_with_suspend;
> > > +       else
> > > +               ops = &vduse_vdpa_config_ops;
> > > +
> > >         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
> > > -                                &vduse_vdpa_config_ops, &vduse_map_ops,
> > > +                                ops, &vduse_map_ops,
> > >                                  dev->ngroups, dev->nas, name, true);
> > >         if (IS_ERR(vdev))
> > >                 return PTR_ERR(vdev);
> > > diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
> > > index d39734cef6d3..95b93bc6bac5 100644
> > > --- a/include/uapi/linux/vduse.h
> > > +++ b/include/uapi/linux/vduse.h
> > > @@ -21,6 +21,9 @@
> > >  /* The VDUSE instance expects a request for vq ready */
> > >  #define VDUSE_F_QUEUE_READY    0
> > >
> > > +/* The VDUSE instance expects a request for suspend */
> > > +#define VDUSE_F_SUSPEND                1
> > > +
> > >  /*
> > >   * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
> > >   * This is used for future extension.
> > > @@ -338,6 +341,7 @@ enum vduse_req_type {
> > >         VDUSE_UPDATE_IOTLB,
> > >         VDUSE_SET_VQ_GROUP_ASID,
> > >         VDUSE_SET_VQ_READY,
> > > +       VDUSE_SUSPEND,
> > >  };
> > >
> > >  /**
> > > --
> > > 2.53.0
> > >
> >
> > Thanks
> >
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] vduse: Add suspend
  2026-03-04  5:52     ` Jason Wang
@ 2026-03-04  6:21       ` Eugenio Perez Martin
  0 siblings, 0 replies; 5+ messages in thread
From: Eugenio Perez Martin @ 2026-03-04  6:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael S . Tsirkin, Yongji Xie, virtualization, linux-kernel,
	Laurent Vivier, Stefano Garzarella, Cindy Lu, Xuan Zhuo,
	Maxime Coquelin

On Wed, Mar 4, 2026 at 6:52 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Tue, Mar 3, 2026 at 6:51 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Tue, Mar 3, 2026 at 8:26 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Wed, Feb 11, 2026 at 8:02 PM Eugenio Pérez <eperezma@redhat.com> wrote:
> > > >
> > > > Implement suspend operation for vduse devices, so vhost-vdpa will offer
> > > > that backend feature and userspace can effectively suspend the device.
> > > >
> > > > This is a must before get virtqueue indexes (base) for live migration,
> > > > since the device could modify them after userland gets them.
> > > >
> > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > ---
> > > > This series depends on
> > > > https://lore.kernel.org/lkml/20260210082554.1582553-1-eperezma@redhat.com
> > > > ---
> > > >  drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
> > > >  include/uapi/linux/vduse.h         |  4 ++
> > > >  2 files changed, 88 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > index 59d9c4718d86..bdcc114e2710 100644
> > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > @@ -54,7 +54,8 @@
> > > >  #define IRQ_UNBOUND -1
> > > >
> > > >  /* Supported VDUSE features */
> > > > -static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
> > > > +static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
> > > > +                                      BIT_U64(VDUSE_F_SUSPEND);
> > > >
> > > >  /*
> > > >   * VDUSE instance have not asked the vduse API version, so assume 0.
> > > > @@ -85,6 +86,7 @@ struct vduse_virtqueue {
> > > >         int irq_effective_cpu;
> > > >         struct cpumask irq_affinity;
> > > >         struct kobject kobj;
> > > > +       struct vduse_dev *dev;
> > > >  };
> > > >
> > > >  struct vduse_dev;
> > > > @@ -134,6 +136,7 @@ struct vduse_dev {
> > > >         int minor;
> > > >         bool broken;
> > > >         bool connected;
> > > > +       bool suspended;
> > > >         u64 api_version;
> > > >         u64 device_features;
> > > >         u64 driver_features;
> > > > @@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > > >
> > > >         down_write(&dev->rwsem);
> > > >
> > > > +       dev->suspended = false;
> > > >         dev->status = 0;
> > > >         dev->driver_features = 0;
> > > >         dev->generation++;
> > > > @@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
> > > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > > >         struct vduse_virtqueue *vq = dev->vqs[idx];
> > > >
> > > > +       guard(rwsem_read)(&vq->dev->rwsem);
> > > > +       if (vq->dev->suspended)
> > > > +               return;
> > > > +
> > > >         if (!eventfd_signal_allowed()) {
> > > >                 schedule_work(&vq->kick);
> > > >                 return;
> > > > @@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> > > >         return 0;
> > > >  }
> > > >
> > > > +static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
> > > > +{
> > > > +       struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > > > +       struct vduse_dev_msg msg = { 0 };
> > > > +       int ret;
> > > > +
> > > > +       msg.req.type = VDUSE_SUSPEND;
> > > > +
> > > > +       ret = vduse_dev_msg_sync(dev, &msg);
> > > > +       if (ret == 0) {
> > > > +               scoped_guard(rwsem_write, &dev->rwsem)
> > > > +                       dev->suspended = true;
> > > > +
> > > > +               cancel_work_sync(&dev->inject);
> > > > +               for (u32 i = 0; i < dev->vq_num; i++)
> > > > +                       cancel_work_sync(&dev->vqs[i]->inject);
> > >
> > > Should we cancel those before msy sync?
> > >
> >
> > The only condition is that the driver must not see interrupts from the
> > moment the ioctl returns.
>
> I think you meant "vduse_vdpa_suspend returns" here.
>
> >
> > Also, disabling it before the msg returns would add complexity because
> > it needs to be reenabled & call the driver if it falis. Or am I
> > missing something it adds?
>
> I think not but I have another question:
>
> 1) I don't see resume, any reason for that

It's not strictly neccesary, and QEMU doesn't even support resume at
the moment. Every time we've implemented suspend / resume in the VDPA
core or parent drivers, it has been done in two different series.

> 2) assuming we need to support resume, do we need to record pending
> interrupt otherwise we may lose interrupt after resuming
>

Sure, I'll handle that when / if I add the resume operation.

> >
> > TBH this is more of a hardening than something the VDUSE driver should
> > do. We could even move to the vdpa core.
> >
> > > > +       }
> > > > +
> > > > +       return ret;
> > > > +}
> > > > +
> > > >  static void vduse_vdpa_free(struct vdpa_device *vdpa)
> > > >  {
> > > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > > > @@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> > > >         .free                   = vduse_vdpa_free,
> > > >  };
> > > >
> > > > +static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
> > > > +       .set_vq_address         = vduse_vdpa_set_vq_address,
> > > > +       .kick_vq                = vduse_vdpa_kick_vq,
> > > > +       .set_vq_cb              = vduse_vdpa_set_vq_cb,
> > > > +       .set_vq_num             = vduse_vdpa_set_vq_num,
> > > > +       .get_vq_size            = vduse_vdpa_get_vq_size,
> > > > +       .get_vq_group           = vduse_get_vq_group,
> > > > +       .set_vq_ready           = vduse_vdpa_set_vq_ready,
> > > > +       .get_vq_ready           = vduse_vdpa_get_vq_ready,
> > > > +       .set_vq_state           = vduse_vdpa_set_vq_state,
> > > > +       .get_vq_state           = vduse_vdpa_get_vq_state,
> > > > +       .get_vq_align           = vduse_vdpa_get_vq_align,
> > > > +       .get_device_features    = vduse_vdpa_get_device_features,
> > > > +       .set_driver_features    = vduse_vdpa_set_driver_features,
> > > > +       .get_driver_features    = vduse_vdpa_get_driver_features,
> > > > +       .set_config_cb          = vduse_vdpa_set_config_cb,
> > > > +       .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
> > > > +       .get_device_id          = vduse_vdpa_get_device_id,
> > > > +       .get_vendor_id          = vduse_vdpa_get_vendor_id,
> > > > +       .get_status             = vduse_vdpa_get_status,
> > > > +       .set_status             = vduse_vdpa_set_status,
> > > > +       .get_config_size        = vduse_vdpa_get_config_size,
> > > > +       .get_config             = vduse_vdpa_get_config,
> > > > +       .set_config             = vduse_vdpa_set_config,
> > > > +       .get_generation         = vduse_vdpa_get_generation,
> > > > +       .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
> > > > +       .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
> > > > +       .reset                  = vduse_vdpa_reset,
> > > > +       .set_map                = vduse_vdpa_set_map,
> > > > +       .set_group_asid         = vduse_set_group_asid,
> > > > +       .get_vq_map             = vduse_get_vq_map,
> > > > +       .suspend                = vduse_vdpa_suspend,
> > >
> > > We can have a
> > >
> > > if (suspend_is_supported)
> > >         ...
> > >
> > > to avoid having a new set of config ops?
> > >
> >
> > It complicates the code of vhost_vdpa in my opinion.
> >
> > Currently, it is:
> > if (ops->suspend)
>
> Okay, I see the problem which is:
>
> static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
> {
>         struct vdpa_device *vdpa = v->vdpa;
>         const struct vdpa_config_ops *ops = vdpa->config;
>
>         return ops->suspend;
> }
>
> >
> > And that would move to something like:
> > if (ops->suspend) {
> >   if (ops->suspend_is_supported && !ops->suspend_is_supported(dev)) {
> >     return; // CANNOT SUSPEND
> >   }
> >
> >   ops->suspend(dev)
> > }
> >
> > Another option I considered is to follow mlx5's approach and have one
> > set of ops per device. But I think duplicating the vdpa_ops in the
> > data section is the cleanest approach. Let me know if you think we
> > should go with suspend_is_supported callback instead.
>
> I think there's no need then.
>
> >
> > > > +       .free                   = vduse_vdpa_free,
> > > > +};
> > > > +
> > > >  static void vduse_dev_sync_single_for_device(union virtio_map token,
> > > >                                              dma_addr_t dma_addr, size_t size,
> > > >                                              enum dma_data_direction dir)
> > > > @@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
> > > >  {
> > > >         struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
> > > >
> > > > +       guard(rwsem_read)(&dev->rwsem);
> > > > +       if (dev->suspended)
> > > > +               return;
> > >
> > > Device can still modify memory, so I wonder how much this can help.
> > >
> >
> > Do you prefer me to remove this guard?
>
> Nope, I misread the code, it should be fine.
>
> Thanks
>
> >
> > > > +
> > > >         spin_lock_bh(&dev->irq_lock);
> > > >         if (dev->config_cb.callback)
> > > >                 dev->config_cb.callback(dev->config_cb.private);
> > > > @@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > > >         struct vduse_virtqueue *vq = container_of(work,
> > > >                                         struct vduse_virtqueue, inject);
> > > >
> > > > +       guard(rwsem_read)(&vq->dev->rwsem);
> > > > +       if (vq->dev->suspended)
> > > > +               return;
> > > > +
> > > >         spin_lock_bh(&vq->irq_lock);
> > > >         if (vq->ready && vq->cb.callback)
> > > >                 vq->cb.callback(vq->cb.private);
> > > > @@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > >         int ret = -EINVAL;
> > > >
> > > >         down_read(&dev->rwsem);
> > > > +       if (dev->suspended)
> > > > +               return ret;
> > > > +
> > > >         if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> > > >                 goto unlock;
> > > >
> > > > @@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> > > >                 }
> > > >
> > > >                 dev->vqs[i]->index = i;
> > > > +               dev->vqs[i]->dev = dev;
> > > >                 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
> > > >                 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
> > > >                 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> > > > @@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
> > > >  static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
> > > >  {
> > > >         struct vduse_vdpa *vdev;
> > > > +       const struct vdpa_config_ops *ops;
> > > >
> > > >         if (dev->vdev)
> > > >                 return -EEXIST;
> > > >
> > > > +       if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
> > > > +               ops = &vduse_vdpa_config_ops_with_suspend;
> > > > +       else
> > > > +               ops = &vduse_vdpa_config_ops;
> > > > +
> > > >         vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
> > > > -                                &vduse_vdpa_config_ops, &vduse_map_ops,
> > > > +                                ops, &vduse_map_ops,
> > > >                                  dev->ngroups, dev->nas, name, true);
> > > >         if (IS_ERR(vdev))
> > > >                 return PTR_ERR(vdev);
> > > > diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
> > > > index d39734cef6d3..95b93bc6bac5 100644
> > > > --- a/include/uapi/linux/vduse.h
> > > > +++ b/include/uapi/linux/vduse.h
> > > > @@ -21,6 +21,9 @@
> > > >  /* The VDUSE instance expects a request for vq ready */
> > > >  #define VDUSE_F_QUEUE_READY    0
> > > >
> > > > +/* The VDUSE instance expects a request for suspend */
> > > > +#define VDUSE_F_SUSPEND                1
> > > > +
> > > >  /*
> > > >   * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
> > > >   * This is used for future extension.
> > > > @@ -338,6 +341,7 @@ enum vduse_req_type {
> > > >         VDUSE_UPDATE_IOTLB,
> > > >         VDUSE_SET_VQ_GROUP_ASID,
> > > >         VDUSE_SET_VQ_READY,
> > > > +       VDUSE_SUSPEND,
> > > >  };
> > > >
> > > >  /**
> > > > --
> > > > 2.53.0
> > > >
> > >
> > > Thanks
> > >
> >
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-03-04  6:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-11 12:01 [PATCH] vduse: Add suspend Eugenio Pérez
2026-03-03  7:26 ` Jason Wang
2026-03-03 10:51   ` Eugenio Perez Martin
2026-03-04  5:52     ` Jason Wang
2026-03-04  6:21       ` Eugenio Perez Martin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox