[RFC PATCH v2 0/2] Virtio-GPU suspend and resume

dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
@ 2025-05-23 22:00 dongwon.kim
  2025-05-23 22:00 ` [RFC PATCH v2 1/2] drm/virtio: Freeze and restore hooks to support " dongwon.kim
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: dongwon.kim @ 2025-05-23 22:00 UTC (permalink / raw)
  To: dri-devel, Dmitry Osipenko; +Cc: Vivek Kasireddy

From: Dongwon Kim <dongwon.kim@intel.com>

This patch series introduces a freeze and restore mechanism for
the virtio-gpu driver:

First patch adds `virtgpu_freeze` and `virtgpu_restore` functions.
These functions handle the deletion of virtio queues before suspension and
their recreation during the restoration process.

Second patch implements a mechanism for restoring `virtio_gpu_object` instances.
This is necessary because the host (QEMU) deletes all associated resources during
the virtio-gpu reset, which occurs as part of the restoration process.

These changes ensure that the virtio-gpu driver can properly handle suspend and
resume scenarios without resource loss.

Dongwon Kim (2):
  drm/virtio: Freeze and restore hooks to support suspend and resume
  drm/virtio: Implement save and restore for virtio_gpu_objects

 drivers/gpu/drm/virtio/virtgpu_drv.c    | 65 +++++++++++++++++++++-
 drivers/gpu/drm/virtio/virtgpu_drv.h    | 11 ++++
 drivers/gpu/drm/virtio/virtgpu_kms.c    | 24 ++++++---
 drivers/gpu/drm/virtio/virtgpu_object.c | 72 +++++++++++++++++++++++++
 4 files changed, 165 insertions(+), 7 deletions(-)

-- 
2.34.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC PATCH v2 1/2] drm/virtio: Freeze and restore hooks to support suspend and resume
  2025-05-23 22:00 [RFC PATCH v2 0/2] Virtio-GPU suspend and resume dongwon.kim
@ 2025-05-23 22:00 ` dongwon.kim
  2025-05-23 22:00 ` [RFC PATCH v2 2/2] drm/virtio: Implement save and restore for virtio_gpu_objects dongwon.kim
  2025-06-14 22:26 ` [RFC PATCH v2 0/2] Virtio-GPU suspend and resume Dmitry Osipenko
  2 siblings, 0 replies; 7+ messages in thread
From: dongwon.kim @ 2025-05-23 22:00 UTC (permalink / raw)
  To: dri-devel, Dmitry Osipenko; +Cc: Vivek Kasireddy

From: Dongwon Kim <dongwon.kim@intel.com>

virtio device needs to delete before VM suspend happens
then reinitialize all virtqueues again upon resume

v2: 10ms sleep was added in virtgpu_freeze to avoid the situation
    the driver is locked up during resumption.

Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.c | 59 +++++++++++++++++++++++++++-
 drivers/gpu/drm/virtio/virtgpu_drv.h |  1 +
 drivers/gpu/drm/virtio/virtgpu_kms.c | 23 ++++++++---
 3 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index e32e680c7197..0b17fe18ef4e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -32,6 +32,7 @@
 #include <linux/poll.h>
 #include <linux/vgaarb.h>
 #include <linux/wait.h>
+#include <linux/delay.h>
 
 #include <drm/clients/drm_client_setup.h>
 #include <drm/drm.h>
@@ -163,6 +164,58 @@ static unsigned int features[] = {
 	VIRTIO_GPU_F_RESOURCE_BLOB,
 	VIRTIO_GPU_F_CONTEXT_INIT,
 };
+
+#ifdef CONFIG_PM_SLEEP
+static int virtgpu_freeze(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	int error;
+
+	error = drm_mode_config_helper_suspend(dev);
+	if (error) {
+		DRM_ERROR("suspend error %d\n", error);
+		return error;
+	}
+
+	/* TODO: Without this short sleep, virtio-gpu driver often hangs
+	 * during restore while executing drm_mode_config_helper_resume
+	 */
+	usleep_range(10000, 10100);
+
+	flush_work(&vgdev->obj_free_work);
+	flush_work(&vgdev->ctrlq.dequeue_work);
+	flush_work(&vgdev->cursorq.dequeue_work);
+	flush_work(&vgdev->config_changed_work);
+	vdev->config->del_vqs(vdev);
+
+	return 0;
+}
+
+static int virtgpu_restore(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	int error;
+
+	error = virtio_gpu_find_vqs(vgdev);
+	if (error) {
+		DRM_ERROR("failed to find virt queues\n");
+		return error;
+	}
+
+	virtio_device_ready(vdev);
+
+	error = drm_mode_config_helper_resume(dev);
+	if (error) {
+		DRM_ERROR("resume error %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+#endif
+
 static struct virtio_driver virtio_gpu_driver = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
@@ -171,7 +224,11 @@ static struct virtio_driver virtio_gpu_driver = {
 	.probe = virtio_gpu_probe,
 	.remove = virtio_gpu_remove,
 	.shutdown = virtio_gpu_shutdown,
-	.config_changed = virtio_gpu_config_changed
+	.config_changed = virtio_gpu_config_changed,
+#ifdef CONFIG_PM_SLEEP
+	.freeze = virtgpu_freeze,
+	.restore = virtgpu_restore,
+#endif
 };
 
 static int __init virtio_gpu_driver_init(void)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index f17660a71a3e..1279f998c8e0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -300,6 +300,7 @@ void virtio_gpu_deinit(struct drm_device *dev);
 void virtio_gpu_release(struct drm_device *dev);
 int virtio_gpu_driver_open(struct drm_device *dev, struct drm_file *file);
 void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file);
+int virtio_gpu_find_vqs(struct virtio_gpu_device *vgdev);
 
 /* virtgpu_gem.c */
 int virtio_gpu_gem_object_open(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 7dfb2006c561..6c1af77ea083 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -114,15 +114,28 @@ static void virtio_gpu_get_capsets(struct virtio_gpu_device *vgdev,
 	vgdev->num_capsets = num_capsets;
 }
 
-int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
+int virtio_gpu_find_vqs(struct virtio_gpu_device *vgdev)
 {
 	struct virtqueue_info vqs_info[] = {
 		{ "control", virtio_gpu_ctrl_ack },
 		{ "cursor", virtio_gpu_cursor_ack },
 	};
-	struct virtio_gpu_device *vgdev;
-	/* this will expand later */
 	struct virtqueue *vqs[2];
+	int ret;
+
+	ret = virtio_find_vqs(vgdev->vdev, 2, vqs, vqs_info, NULL);
+	if (ret)
+		return ret;
+
+	vgdev->ctrlq.vq = vqs[0];
+	vgdev->cursorq.vq = vqs[1];
+
+	return 0;
+}
+
+int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
+{
+	struct virtio_gpu_device *vgdev;
 	u32 num_scanouts, num_capsets;
 	int ret = 0;
 
@@ -206,13 +219,11 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
 	DRM_INFO("features: %ccontext_init\n",
 		 vgdev->has_context_init ? '+' : '-');
 
-	ret = virtio_find_vqs(vgdev->vdev, 2, vqs, vqs_info, NULL);
+	ret = virtio_gpu_find_vqs(vgdev);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
 	}
-	vgdev->ctrlq.vq = vqs[0];
-	vgdev->cursorq.vq = vqs[1];
 	ret = virtio_gpu_alloc_vbufs(vgdev);
 	if (ret) {
 		DRM_ERROR("failed to alloc vbufs\n");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH v2 2/2] drm/virtio: Implement save and restore for virtio_gpu_objects
  2025-05-23 22:00 [RFC PATCH v2 0/2] Virtio-GPU suspend and resume dongwon.kim
  2025-05-23 22:00 ` [RFC PATCH v2 1/2] drm/virtio: Freeze and restore hooks to support " dongwon.kim
@ 2025-05-23 22:00 ` dongwon.kim
  2025-06-14 22:26 ` [RFC PATCH v2 0/2] Virtio-GPU suspend and resume Dmitry Osipenko
  2 siblings, 0 replies; 7+ messages in thread
From: dongwon.kim @ 2025-05-23 22:00 UTC (permalink / raw)
  To: dri-devel, Dmitry Osipenko; +Cc: Vivek Kasireddy

From: Dongwon Kim <dongwon.kim@intel.com>

Host KVM/QEMU loses all graphics resources submitted by the guest OS
upon resumption from sleep or hibernation. This results in invalid
resource errors when the guest OS attempts to interact with the host
regarding those resources.

To address this issue, the virtio-gpu driver now resubmits all existing
resources upon resumption. A linked list has been introduced to maintain
references to all created `virtio_gpu_object` instances and their parameters.

Whenever a new object is created and sent to the host, it is added to this
list. During the `.resume` function, all backed-up objects are re-sent to
the host using the 'create resource' virtio GPU command, ensuring the
resources are restored seamlessly.

v2: - reset bo->attached if bo->attached was set before so that attach
      backing can be redone upon restore

Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.c    |  6 +++
 drivers/gpu/drm/virtio/virtgpu_drv.h    | 10 ++++
 drivers/gpu/drm/virtio/virtgpu_kms.c    |  1 +
 drivers/gpu/drm/virtio/virtgpu_object.c | 72 +++++++++++++++++++++++++
 4 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 0b17fe18ef4e..1f591369a004 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -206,6 +206,12 @@ static int virtgpu_restore(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
+	error = virtio_gpu_object_restore_all(vgdev);
+	if (error) {
+		DRM_ERROR("Failed to recover objects\n");
+		return error;
+	}
+
 	error = drm_mode_config_helper_resume(dev);
 	if (error) {
 		DRM_ERROR("resume error %d\n", error);
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 1279f998c8e0..55f836378237 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -126,6 +126,12 @@ struct virtio_gpu_object_array {
 	struct drm_gem_object *objs[] __counted_by(total);
 };
 
+struct virtio_gpu_object_restore {
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_object_params params;
+	struct list_head node;
+};
+
 struct virtio_gpu_vbuffer;
 struct virtio_gpu_device;
 
@@ -265,6 +271,7 @@ struct virtio_gpu_device {
 	struct work_struct obj_free_work;
 	spinlock_t obj_free_lock;
 	struct list_head obj_free_list;
+	struct list_head obj_restore;
 
 	struct virtio_gpu_drv_capset *capsets;
 	uint32_t num_capsets;
@@ -479,6 +486,9 @@ bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo);
 
 int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
 			       uint32_t *resid);
+
+int virtio_gpu_object_restore_all(struct virtio_gpu_device *vgdev);
+
 /* virtgpu_prime.c */
 int virtio_gpu_resource_assign_uuid(struct virtio_gpu_device *vgdev,
 				    struct virtio_gpu_object *bo);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 6c1af77ea083..17d182737910 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -162,6 +162,7 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
 	vgdev->fence_drv.context = dma_fence_context_alloc(1);
 	spin_lock_init(&vgdev->fence_drv.lock);
 	INIT_LIST_HEAD(&vgdev->fence_drv.fences);
+	INIT_LIST_HEAD(&vgdev->obj_restore);
 	INIT_LIST_HEAD(&vgdev->cap_cache);
 	INIT_WORK(&vgdev->config_changed_work,
 		  virtio_gpu_config_changed_work_func);
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 5517cff8715c..15c2598187ed 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -61,6 +61,38 @@ static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t
 	}
 }
 
+static void virtio_gpu_object_add_restore_list(struct virtio_gpu_device *vgdev,
+					       struct virtio_gpu_object *bo,
+					       struct virtio_gpu_object_params *params)
+{
+	struct virtio_gpu_object_restore *new;
+
+	new = kvmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new) {
+		DRM_ERROR("Fail to allocate virtio_gpu_object_restore");
+		return;
+	}
+
+	new->bo = bo;
+	memcpy(&new->params, params, sizeof(*params));
+
+	list_add_tail(&new->node, &vgdev->obj_restore);
+}
+
+static void virtio_gpu_object_del_restore_list(struct virtio_gpu_device *vgdev,
+					       struct virtio_gpu_object *bo)
+{
+	struct virtio_gpu_object_restore *curr, *tmp;
+
+	list_for_each_entry_safe(curr, tmp, &vgdev->obj_restore, node) {
+		if (bo == curr->bo) {
+			list_del(&curr->node);
+			kvfree(curr);
+			break;
+		}
+	}
+}
+
 void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo)
 {
 	struct virtio_gpu_device *vgdev = bo->base.base.dev->dev_private;
@@ -84,6 +116,7 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo)
 		drm_gem_object_release(&bo->base.base);
 		kfree(bo);
 	}
+	virtio_gpu_object_del_restore_list(vgdev, bo);
 }
 
 static void virtio_gpu_free_object(struct drm_gem_object *obj)
@@ -257,8 +290,11 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
 					       objs, fence);
 		virtio_gpu_object_attach(vgdev, bo, ents, nents);
 	}
+	/* add submitted object to restore list */
+	virtio_gpu_object_add_restore_list(vgdev, bo, params);
 
 	*bo_ptr = bo;
+
 	return 0;
 
 err_put_objs:
@@ -271,3 +307,39 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
 	drm_gem_shmem_free(shmem_obj);
 	return ret;
 }
+
+int virtio_gpu_object_restore_all(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_object_restore *curr, *tmp;
+	struct virtio_gpu_mem_entry *ents;
+	unsigned int nents;
+	int ret;
+
+	list_for_each_entry_safe(curr, tmp, &vgdev->obj_restore, node) {
+		ret = virtio_gpu_object_shmem_init(vgdev, curr->bo, &ents, &nents);
+		if (ret)
+			break;
+
+		if (curr->params.blob) {
+			virtio_gpu_cmd_resource_create_blob(vgdev, curr->bo, &curr->params,
+							    ents, nents);
+		} else if (curr->params.virgl) {
+			virtio_gpu_cmd_resource_create_3d(vgdev, curr->bo, &curr->params,
+							  NULL, NULL);
+
+			if (curr->bo->attached) {
+				curr->bo->attached = false;
+				virtio_gpu_object_attach(vgdev, curr->bo, ents, nents);
+			}
+		} else {
+			virtio_gpu_cmd_create_resource(vgdev, curr->bo, &curr->params,
+						       NULL, NULL);
+			if (curr->bo->attached) {
+				curr->bo->attached = false;
+				virtio_gpu_object_attach(vgdev, curr->bo, ents, nents);
+			}
+		}
+	}
+
+	return ret;
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
  2025-05-23 22:00 [RFC PATCH v2 0/2] Virtio-GPU suspend and resume dongwon.kim
  2025-05-23 22:00 ` [RFC PATCH v2 1/2] drm/virtio: Freeze and restore hooks to support " dongwon.kim
  2025-05-23 22:00 ` [RFC PATCH v2 2/2] drm/virtio: Implement save and restore for virtio_gpu_objects dongwon.kim
@ 2025-06-14 22:26 ` Dmitry Osipenko
  2025-06-17 22:41   ` Kim, Dongwon
  2 siblings, 1 reply; 7+ messages in thread
From: Dmitry Osipenko @ 2025-06-14 22:26 UTC (permalink / raw)
  To: dongwon.kim, dri-devel; +Cc: Vivek Kasireddy

Hi,

On 5/24/25 01:00, dongwon.kim@intel.com wrote:
> From: Dongwon Kim <dongwon.kim@intel.com>
> 
> This patch series introduces a freeze and restore mechanism for
> the virtio-gpu driver:
> 
> First patch adds `virtgpu_freeze` and `virtgpu_restore` functions.
> These functions handle the deletion of virtio queues before suspension and
> their recreation during the restoration process.
> 
> Second patch implements a mechanism for restoring `virtio_gpu_object` instances.
> This is necessary because the host (QEMU) deletes all associated resources during
> the virtio-gpu reset, which occurs as part of the restoration process.
> 
> These changes ensure that the virtio-gpu driver can properly handle suspend and
> resume scenarios without resource loss.
> 
> Dongwon Kim (2):
>   drm/virtio: Freeze and restore hooks to support suspend and resume
>   drm/virtio: Implement save and restore for virtio_gpu_objects
> 
>  drivers/gpu/drm/virtio/virtgpu_drv.c    | 65 +++++++++++++++++++++-
>  drivers/gpu/drm/virtio/virtgpu_drv.h    | 11 ++++
>  drivers/gpu/drm/virtio/virtgpu_kms.c    | 24 ++++++---
>  drivers/gpu/drm/virtio/virtgpu_object.c | 72 +++++++++++++++++++++++++
>  4 files changed, 165 insertions(+), 7 deletions(-)

Tested the patches, applied to v6.15.2. Suspend-resume works with v2,
display works on resume.

Have you figured out why 10ms workaround is needed?

-- 
Best regards,
Dmitry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
  2025-06-14 22:26 ` [RFC PATCH v2 0/2] Virtio-GPU suspend and resume Dmitry Osipenko
@ 2025-06-17 22:41   ` Kim, Dongwon
  2025-07-01  1:59     ` Dmitry Osipenko
  0 siblings, 1 reply; 7+ messages in thread
From: Kim, Dongwon @ 2025-06-17 22:41 UTC (permalink / raw)
  To: Dmitry Osipenko, dri-devel@lists.freedesktop.org; +Cc: Kasireddy, Vivek

Hi Dmitry,

> Subject: Re: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
> 
> Hi,
> 
> On 5/24/25 01:00, dongwon.kim@intel.com wrote:
> > From: Dongwon Kim <dongwon.kim@intel.com>
> >
> > This patch series introduces a freeze and restore mechanism for the
> > virtio-gpu driver:
> >
> > First patch adds `virtgpu_freeze` and `virtgpu_restore` functions.
> > These functions handle the deletion of virtio queues before suspension
> > and their recreation during the restoration process.
> >
> > Second patch implements a mechanism for restoring `virtio_gpu_object`
> instances.
> > This is necessary because the host (QEMU) deletes all associated
> > resources during the virtio-gpu reset, which occurs as part of the
> restoration process.
> >
> > These changes ensure that the virtio-gpu driver can properly handle
> > suspend and resume scenarios without resource loss.
> >
> > Dongwon Kim (2):
> >   drm/virtio: Freeze and restore hooks to support suspend and resume
> >   drm/virtio: Implement save and restore for virtio_gpu_objects
> >
> >  drivers/gpu/drm/virtio/virtgpu_drv.c    | 65 +++++++++++++++++++++-
> >  drivers/gpu/drm/virtio/virtgpu_drv.h    | 11 ++++
> >  drivers/gpu/drm/virtio/virtgpu_kms.c    | 24 ++++++---
> >  drivers/gpu/drm/virtio/virtgpu_object.c | 72
> > +++++++++++++++++++++++++
> >  4 files changed, 165 insertions(+), 7 deletions(-)
> 
> Tested the patches, applied to v6.15.2. Suspend-resume works with v2,
> display works on resume.
> 
> Have you figured out why 10ms workaround is needed?

[Kim, Dongwon] Unfortunately, I don't know why it fails without the delay. I wanted to narrow down further
so enabled printk during suspend and resume but hang didn't occur with the timing changes
caused by printks.  I've also tried more deterministic methods that make it wait based on some
kinds of "status" but none of them have worked so far. If you have any suggestions on possible
condition we can check instead of just sleeping, please let me know.
10ms seems to be close to minimum to make it work 100% for several days (rtcwake sleep and
wake up every 5 sec).

> 
> --
> Best regards,
> Dmitry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
  2025-06-17 22:41   ` Kim, Dongwon
@ 2025-07-01  1:59     ` Dmitry Osipenko
  2025-07-01  2:01       ` Kim, Dongwon
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Osipenko @ 2025-07-01  1:59 UTC (permalink / raw)
  To: Kim, Dongwon, dri-devel@lists.freedesktop.org; +Cc: Kasireddy, Vivek

On 6/18/25 01:41, Kim, Dongwon wrote:
...
>> Have you figured out why 10ms workaround is needed?
> 
> [Kim, Dongwon] Unfortunately, I don't know why it fails without the delay. I wanted to narrow down further
> so enabled printk during suspend and resume but hang didn't occur with the timing changes
> caused by printks.  I've also tried more deterministic methods that make it wait based on some
> kinds of "status" but none of them have worked so far. If you have any suggestions on possible
> condition we can check instead of just sleeping, please let me know.
> 10ms seems to be close to minimum to make it work 100% for several days (rtcwake sleep and
> wake up every 5 sec).

Was able to reproduce the hang and got a crash backtrace with no_console_suspend:

[   63.824827] PM: suspend entry (deep)
[   63.825041] Filesystems sync: 0.000 seconds
[   63.990951] Freezing user space processes
[   63.992488] Freezing user space processes completed (elapsed 0.001 seconds)
[   63.992775] OOM killer disabled.
[   63.992902] Freezing remaining freezable tasks
[   63.994099] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
[   64.002183] Oops: general protection fault, probably for non-canonical address 0x2abe0ea26847fb08: 0000 [#1] SMP NOPTI
[   64.003172] CPU: 9 UID: 0 PID: 178 Comm: kworker/9:2 Not tainted 6.15.4-00002-g01117b4373b2-dirty #123 PREEMPT(voluntary) 
[   64.003614] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[   64.004036] Workqueue: events virtio_gpu_dequeue_ctrl_func
[   64.004280] RIP: 0010:virtqueue_get_buf_ctx_split+0x86/0x130
[   64.004515] Code: 01 66 23 43 50 0f b7 c0 8b 74 c1 04 8b 44 c1 08 41 89 45 00 3b 73 58 0f 83 96 d7 20 ff 89 f0 48 c1 e0 04 48 03 83 80 00 00 00 <4c> 8b 20 4d 85 e4 0f 84 5a d7 20 ff 48 89 df e8 46 fc ff ff 0f b7
[   64.005227] RSP: 0018:ffffc90000b53d90 EFLAGS: 00010202
[   64.005430] RAX: 2abe0ea26847fb08 RBX: ffff888102d58a00 RCX: ffff8881255314c0
[   64.005698] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff888102d58a00
[   64.005975] RBP: ffffc90000b53db0 R08: 8080808080808080 R09: ffff88885b470b40
[   64.006273] R10: ffff8881000508c8 R11: fefefefefefefeff R12: 0000000000000001
[   64.006907] R13: ffffc90000b53dfc R14: ffffc90000b53dfc R15: ffff8881032d0568
[   64.007205] FS:  0000000000000000(0000) GS:ffff8888d6650000(0000) knlGS:0000000000000000
[   64.007511] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   64.007732] CR2: 00007efedc4d3000 CR3: 00000001056e9000 CR4: 0000000000750ef0
[   64.008014] PKRU: 55555554
[   64.008123] Call Trace:
[   64.008223]  <TASK>
[   64.008314]  virtqueue_get_buf+0x46/0x60
[   64.008465]  virtio_gpu_dequeue_ctrl_func+0x86/0x2a0
[   64.008655]  process_one_work+0x18a/0x370
[   64.008823]  worker_thread+0x31a/0x460
[   64.008971]  ? _raw_spin_unlock_irqrestore+0x27/0x50
[   64.009176]  ? srso_alias_return_thunk+0x5/0xfbef5
[   64.009369]  ? __pfx_worker_thread+0x10/0x10
[   64.009532]  kthread+0x126/0x230
[   64.009662]  ? _raw_spin_unlock_irq+0x1f/0x40
[   64.009836]  ? __pfx_kthread+0x10/0x10
[   64.009986]  ret_from_fork+0x3a/0x60
[   64.010156]  ? __pfx_kthread+0x10/0x10
[   64.010318]  ret_from_fork_asm+0x1a/0x30
[   64.010507]  </TASK>
[   64.010616] Modules linked in:
[   64.010785] ---[ end trace 0000000000000000 ]--- 

==

The trace tells that virtio queue is active after it has been removed. This change fixes the crash, please test:

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 03ab78b44ab3..48bb21f33306 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -187,6 +187,10 @@ static int virtgpu_freeze(struct virtio_device *vdev)
        flush_work(&vgdev->ctrlq.dequeue_work);
        flush_work(&vgdev->cursorq.dequeue_work);
        flush_work(&vgdev->config_changed_work);
+       wait_event(vgdev->ctrlq.ack_queue,
+                  vgdev->ctrlq.vq->num_free == vgdev->ctrlq.vq->num_max);
+       wait_event(vgdev->cursorq.ack_queue,
+                  vgdev->cursorq.vq->num_free == vgdev->cursorq.vq->num_max);
        vdev->config->del_vqs(vdev);
 
        return 0;

-- 
Best regards,
Dmitry

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
  2025-07-01  1:59     ` Dmitry Osipenko
@ 2025-07-01  2:01       ` Kim, Dongwon
  0 siblings, 0 replies; 7+ messages in thread
From: Kim, Dongwon @ 2025-07-01  2:01 UTC (permalink / raw)
  To: Dmitry Osipenko, dri-devel@lists.freedesktop.org; +Cc: Kasireddy, Vivek

Hi Dmitry,

This is great. I will test this and include the fix in the patch.

> -----Original Message-----
> From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Sent: Monday, June 30, 2025 7:00 PM
> To: Kim, Dongwon <dongwon.kim@intel.com>; dri-
> devel@lists.freedesktop.org
> Cc: Kasireddy, Vivek <vivek.kasireddy@intel.com>
> Subject: Re: [RFC PATCH v2 0/2] Virtio-GPU suspend and resume
> 
> On 6/18/25 01:41, Kim, Dongwon wrote:
> ...
> >> Have you figured out why 10ms workaround is needed?
> >
> > [Kim, Dongwon] Unfortunately, I don't know why it fails without the
> > delay. I wanted to narrow down further so enabled printk during
> > suspend and resume but hang didn't occur with the timing changes
> > caused by printks.  I've also tried more deterministic methods that
> > make it wait based on some kinds of "status" but none of them have
> worked so far. If you have any suggestions on possible condition we can
> check instead of just sleeping, please let me know.
> > 10ms seems to be close to minimum to make it work 100% for several
> > days (rtcwake sleep and wake up every 5 sec).
> 
> Was able to reproduce the hang and got a crash backtrace with
> no_console_suspend:
> 
> [   63.824827] PM: suspend entry (deep)
> [   63.825041] Filesystems sync: 0.000 seconds
> [   63.990951] Freezing user space processes
> [   63.992488] Freezing user space processes completed (elapsed 0.001
> seconds)
> [   63.992775] OOM killer disabled.
> [   63.992902] Freezing remaining freezable tasks
> [   63.994099] Freezing remaining freezable tasks completed (elapsed 0.001
> seconds)
> [   64.002183] Oops: general protection fault, probably for non-canonical
> address 0x2abe0ea26847fb08: 0000 [#1] SMP NOPTI
> [   64.003172] CPU: 9 UID: 0 PID: 178 Comm: kworker/9:2 Not tainted 6.15.4-
> 00002-g01117b4373b2-dirty #123 PREEMPT(voluntary)
> [   64.003614] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> [   64.004036] Workqueue: events virtio_gpu_dequeue_ctrl_func
> [   64.004280] RIP: 0010:virtqueue_get_buf_ctx_split+0x86/0x130
> [   64.004515] Code: 01 66 23 43 50 0f b7 c0 8b 74 c1 04 8b 44 c1 08 41 89 45
> 00 3b 73 58 0f 83 96 d7 20 ff 89 f0 48 c1 e0 04 48 03 83 80 00 00 00 <4c> 8b 20
> 4d 85 e4 0f 84 5a d7 20 ff 48 89 df e8 46 fc ff ff 0f b7
> [   64.005227] RSP: 0018:ffffc90000b53d90 EFLAGS: 00010202
> [   64.005430] RAX: 2abe0ea26847fb08 RBX: ffff888102d58a00 RCX:
> ffff8881255314c0
> [   64.005698] RDX: 0000000000000000 RSI: 0000000000000008 RDI:
> ffff888102d58a00
> [   64.005975] RBP: ffffc90000b53db0 R08: 8080808080808080 R09:
> ffff88885b470b40
> [   64.006273] R10: ffff8881000508c8 R11: fefefefefefefeff R12:
> 0000000000000001
> [   64.006907] R13: ffffc90000b53dfc R14: ffffc90000b53dfc R15:
> ffff8881032d0568
> [   64.007205] FS:  0000000000000000(0000) GS:ffff8888d6650000(0000)
> knlGS:0000000000000000
> [   64.007511] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   64.007732] CR2: 00007efedc4d3000 CR3: 00000001056e9000 CR4:
> 0000000000750ef0
> [   64.008014] PKRU: 55555554
> [   64.008123] Call Trace:
> [   64.008223]  <TASK>
> [   64.008314]  virtqueue_get_buf+0x46/0x60
> [   64.008465]  virtio_gpu_dequeue_ctrl_func+0x86/0x2a0
> [   64.008655]  process_one_work+0x18a/0x370
> [   64.008823]  worker_thread+0x31a/0x460
> [   64.008971]  ? _raw_spin_unlock_irqrestore+0x27/0x50
> [   64.009176]  ? srso_alias_return_thunk+0x5/0xfbef5
> [   64.009369]  ? __pfx_worker_thread+0x10/0x10
> [   64.009532]  kthread+0x126/0x230
> [   64.009662]  ? _raw_spin_unlock_irq+0x1f/0x40
> [   64.009836]  ? __pfx_kthread+0x10/0x10
> [   64.009986]  ret_from_fork+0x3a/0x60
> [   64.010156]  ? __pfx_kthread+0x10/0x10
> [   64.010318]  ret_from_fork_asm+0x1a/0x30
> [   64.010507]  </TASK>
> [   64.010616] Modules linked in:
> [   64.010785] ---[ end trace 0000000000000000 ]---
> 
> ==
> 
> The trace tells that virtio queue is active after it has been removed. This
> change fixes the crash, please test:
> 
> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c
> b/drivers/gpu/drm/virtio/virtgpu_drv.c
> index 03ab78b44ab3..48bb21f33306 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_drv.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
> @@ -187,6 +187,10 @@ static int virtgpu_freeze(struct virtio_device *vdev)
>         flush_work(&vgdev->ctrlq.dequeue_work);
>         flush_work(&vgdev->cursorq.dequeue_work);
>         flush_work(&vgdev->config_changed_work);
> +       wait_event(vgdev->ctrlq.ack_queue,
> +                  vgdev->ctrlq.vq->num_free == vgdev->ctrlq.vq->num_max);
> +       wait_event(vgdev->cursorq.ack_queue,
> +                  vgdev->cursorq.vq->num_free ==
> + vgdev->cursorq.vq->num_max);
>         vdev->config->del_vqs(vdev);
> 
>         return 0;
> 
> --
> Best regards,
> Dmitry

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-07-01  2:02 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-23 22:00 [RFC PATCH v2 0/2] Virtio-GPU suspend and resume dongwon.kim
2025-05-23 22:00 ` [RFC PATCH v2 1/2] drm/virtio: Freeze and restore hooks to support " dongwon.kim
2025-05-23 22:00 ` [RFC PATCH v2 2/2] drm/virtio: Implement save and restore for virtio_gpu_objects dongwon.kim
2025-06-14 22:26 ` [RFC PATCH v2 0/2] Virtio-GPU suspend and resume Dmitry Osipenko
2025-06-17 22:41   ` Kim, Dongwon
2025-07-01  1:59     ` Dmitry Osipenko
2025-07-01  2:01       ` Kim, Dongwon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).