From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
To: criu@lists.linux.dev
Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
David Francis <David.Francis@amd.com>
Subject: [PATCH v2 18/23] plugins/amdgpu: Move drm file dump and restore into helpers
Date: Fri, 10 Apr 2026 19:55:09 +0100 [thread overview]
Message-ID: <20260410185514.51153-19-tvrtko.ursulin@igalia.com> (raw)
In-Reply-To: <20260410185514.51153-1-tvrtko.ursulin@igalia.com>
Given that dump and restore of KFD vs DRM forks into completely parallel
flows inside the respective plugin callbacks, lets split it out by
extracting into helpers. This de-clutters the code flow and makes it
obvious which part is done where.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-By: David Francis <David.Francis@amd.com>
---
plugins/amdgpu/amdgpu_plugin.c | 275 ++++++++++++++------------
plugins/amdgpu/amdgpu_plugin_dmabuf.c | 2 +
2 files changed, 146 insertions(+), 131 deletions(-)
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index f00b60453c6c..2e3f997266c0 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -1397,6 +1397,30 @@ exit:
return ret;
}
+static int amdgpu_plugin_dump_drm_file(int fd, int id, struct stat *st)
+{
+ int ret;
+
+ /* This is RenderD dumper plugin, for now just save renderD
+ * minor number to be used during restore. In later phases this
+ * needs to save more data for video decode etc.
+ */
+ ret = amdgpu_plugin_drm_dump_file(fd, id, st);
+ if (ret)
+ return ret;
+
+ ret = record_dumped_fd(fd, true);
+ if (ret)
+ return ret;
+
+ ret = try_dump_dmabuf_list();
+
+ if (!ret)
+ ret = amdgpu_add_to_inventory();
+
+ return ret;
+}
+
int amdgpu_plugin_dump_file(int fd, int id)
{
struct kfd_ioctl_criu_args args = { 0 };
@@ -1435,35 +1459,14 @@ int amdgpu_plugin_dump_file(int fd, int id)
pr_perror("Failed to get dmabuf info");
return -1;
}
- if (ret == 0) {
- pr_info("Dumping dmabuf fd = %d\n", fd);
- return amdgpu_plugin_dmabuf_dump(fd, id);
- }
-
- if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
-
- /* This is RenderD dumper plugin, for now just save renderD
- * minor number to be used during restore. In later phases this
- * needs to save more data for video decode etc.
- */
- ret = amdgpu_plugin_drm_dump_file(fd, id, &st);
- if (ret)
- return ret;
-
- ret = record_dumped_fd(fd, true);
- if (ret)
- return ret;
-
- ret = try_dump_dmabuf_list();
-
- if (!ret)
- ret = amdgpu_add_to_inventory();
-
- return ret;
- }
pr_info("%s() called for fd = %d\n", __func__, major(st.st_rdev));
+ if (ret == 0)
+ return amdgpu_plugin_dmabuf_dump(fd, id);
+ else if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0)
+ return amdgpu_plugin_dump_drm_file(fd, id, &st);
+
/* KFD only allows ioctl calls from the same process that opened the KFD file descriptor.
* The existing /dev/kfd file descriptor that is passed in is only allowed to do IOCTL calls with
* CAP_CHECKPOINT_RESTORE/CAP_SYS_ADMIN. So kernel_supports_criu() needs to open its own file descriptor to
@@ -1835,12 +1838,124 @@ exit:
return ret;
}
+static int amdgpu_plugin_restore_drm_file(int id, bool *retry_needed)
+{
+ char img_path[PATH_MAX];
+ struct tp_node *tp_node;
+ uint32_t target_gpu_id;
+ CriuRenderNode *rd;
+ unsigned char *buf;
+ size_t img_size;
+ FILE *img_fp;
+ int fd, ret;
+
+ /* This is restorer plugin for renderD nodes. Criu doesn't guarantee that they will
+ * be called before the plugin is called for kfd file descriptor.
+ * TODO: Currently, this code will only work if this function is called for /dev/kfd
+ * first as we assume restore_maps is already filled. Need to fix this later.
+ */
+ snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id);
+
+ img_fp = open_img_file(img_path, false, &img_size, true);
+ if (!img_fp) {
+ ret = amdgpu_plugin_dmabuf_restore(id);
+ if (ret == 1) {
+ /* This is a dmabuf fd, but the corresponding buffer object that was
+ * exported to make it has not yet been restored. Need to try again
+ * later when the buffer object exists, so it can be re-exported.
+ */
+ *retry_needed = true;
+ return 0;
+ }
+ return ret;
+ }
+ pr_info("Restoring RenderD %s\n", img_path);
+ pr_debug("RenderD Image file size:%ld\n", img_size);
+ buf = xmalloc(img_size);
+ if (!buf) {
+ pr_perror("Failed to allocate memory");
+ return -ENOMEM;
+ }
+
+ ret = read_fp(img_fp, buf, img_size);
+ if (ret) {
+ pr_perror("Unable to read from %s", img_path);
+ xfree(buf);
+ return -1;
+ }
+
+ rd = criu_render_node__unpack(NULL, img_size, buf);
+ if (rd == NULL) {
+ pr_perror("Unable to parse the RenderD message %d", id);
+ xfree(buf);
+ fclose(img_fp);
+ return -1;
+ }
+ fclose(img_fp);
+
+ pr_info("render node gpu_id = 0x%04x\n", rd->gpu_id);
+
+ target_gpu_id = maps_get_dest_gpu(&restore_maps, rd->gpu_id);
+ if (!target_gpu_id) {
+ fd = -ENODEV;
+ goto fail;
+ }
+
+ tp_node = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
+ if (!tp_node) {
+ fd = -ENODEV;
+ goto fail;
+ }
+
+ pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
+
+ fd = node_get_drm_render_device(tp_node);
+ if (fd < 0) {
+ pr_err("Failed to open render device (minor:%d) - %s\n",
+ tp_node->drm_render_minor, strerror(-fd));
+ goto fail;
+ }
+
+ ret = amdgpu_plugin_drm_restore_file(fd, rd);
+ if (ret == 1)
+ *retry_needed = true;
+ if (ret < 0) {
+ fd = ret;
+ goto fail;
+ }
+fail:
+ criu_render_node__free_unpacked(rd, NULL);
+ xfree(buf);
+ /*
+ * We need to use the file descriptor used to create the BOs for mmap later, otherwise the kernel DRM
+ * drivers will not allow the mmap. Therefore, we keep a copy of the file descriptor (stored in tp_node)
+ * so that we can return it in amdgpu_plugin_update_vmamap later. Also, CRIU core will dup and close the
+ * returned fd after this function returns, and this will make our fd invalid. So we return a dup'ed
+ * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
+ * tp_node.
+ */
+
+ if (fd < 0)
+ return fd;
+
+ if (!(*retry_needed)) {
+ fd = dup(fd);
+ if (fd == -1) {
+ pr_perror("unable to duplicate the render fd");
+ return -1;
+ }
+ return fd;
+ }
+
+ return 0;
+
+}
+
int amdgpu_plugin_restore_file(int id, bool *retry_needed)
{
int ret = 0, fd;
char img_path[PATH_MAX];
unsigned char *buf;
- CriuRenderNode *rd;
CriuKfd *e = NULL;
struct kfd_ioctl_criu_args args = { 0 };
size_t img_size;
@@ -1856,110 +1971,8 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed)
snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id);
img_fp = open_img_file(img_path, false, &img_size, false);
- if (!img_fp) {
- struct tp_node *tp_node;
- uint32_t target_gpu_id;
-
- /* This is restorer plugin for renderD nodes. Criu doesn't guarantee that they will
- * be called before the plugin is called for kfd file descriptor.
- * TODO: Currently, this code will only work if this function is called for /dev/kfd
- * first as we assume restore_maps is already filled. Need to fix this later.
- */
- snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id);
-
- img_fp = open_img_file(img_path, false, &img_size, true);
- if (!img_fp) {
- ret = amdgpu_plugin_dmabuf_restore(id);
- if (ret == 1) {
- /* This is a dmabuf fd, but the corresponding buffer object that was
- * exported to make it has not yet been restored. Need to try again
- * later when the buffer object exists, so it can be re-exported.
- */
- *retry_needed = true;
- return 0;
- }
- return ret;
- }
- pr_info("Restoring RenderD %s\n", img_path);
- pr_debug("RenderD Image file size:%ld\n", img_size);
- buf = xmalloc(img_size);
- if (!buf) {
- pr_perror("Failed to allocate memory");
- return -ENOMEM;
- }
-
- ret = read_fp(img_fp, buf, img_size);
- if (ret) {
- pr_perror("Unable to read from %s", img_path);
- xfree(buf);
- return -1;
- }
-
- rd = criu_render_node__unpack(NULL, img_size, buf);
- if (rd == NULL) {
- pr_perror("Unable to parse the RenderD message %d", id);
- xfree(buf);
- fclose(img_fp);
- return -1;
- }
- fclose(img_fp);
-
- pr_info("render node gpu_id = 0x%04x\n", rd->gpu_id);
-
- target_gpu_id = maps_get_dest_gpu(&restore_maps, rd->gpu_id);
- if (!target_gpu_id) {
- fd = -ENODEV;
- goto fail;
- }
-
- tp_node = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
- if (!tp_node) {
- fd = -ENODEV;
- goto fail;
- }
-
- pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
-
- fd = node_get_drm_render_device(tp_node);
- if (fd < 0) {
- pr_err("Failed to open render device (minor:%d) - %s\n",
- tp_node->drm_render_minor, strerror(-fd));
- return -1;
- }
-
- ret = amdgpu_plugin_drm_restore_file(fd, rd);
- if (ret == 1)
- *retry_needed = true;
- if (ret < 0) {
- fd = ret;
- goto fail;
- }
- fail:
- criu_render_node__free_unpacked(rd, NULL);
- xfree(buf);
- /*
- * We need to use the file descriptor used to create the BOs for mmap later, otherwise the kernel DRM
- * drivers will not allow the mmap. Therefore, we keep a copy of the file descriptor (stored in tp_node)
- * so that we can return it in amdgpu_plugin_update_vmamap later. Also, CRIU core will dup and close the
- * returned fd after this function returns, and this will make our fd invalid. So we return a dup'ed
- * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
- * tp_node.
- */
-
- if (fd < 0)
- return fd;
-
- if (!(*retry_needed)) {
- fd = dup(fd);
- if (fd == -1) {
- pr_perror("unable to duplicate the render fd");
- return -1;
- }
- return fd;
- }
-
- return 0;
- }
+ if (!img_fp)
+ return amdgpu_plugin_restore_drm_file(id, retry_needed);
fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
if (fd < 0) {
diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.c b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
index 312c9f95d048..00181d46ca5c 100644
--- a/plugins/amdgpu/amdgpu_plugin_dmabuf.c
+++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
@@ -164,6 +164,8 @@ int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id)
{
int ret;
+ pr_info("Dumping dmabuf fd %d\n", dmabuf_fd);
+
ret = __amdgpu_plugin_dmabuf_dump(dmabuf_fd, id);
if (ret == -EAGAIN) {
struct dmabuf *b = xmalloc(sizeof(*b));
--
2.52.0
next prev parent reply other threads:[~2026-04-10 18:55 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-10 18:54 [PATCH v2 00/23] Amdgpu plugin cleanups and fixes Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 01/23] plugins/amgdpu: Fix one error message Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 02/23] plugins/amdgpu: Remove unused current_pid global variable Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 03/23] plugins/amdgpu: Remove unused new_minor from struct vma_metadata Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 04/23] plugins/amdgpu: Fix drm pages size header Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 05/23] plugins/amdgpu: Fix logging of failures to open files during restore init Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 06/23] plugins/amdgpu: Propagate failure to save buffer object content Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 07/23] plugins/amdgpu: Close the directory when image probing fails Tvrtko Ursulin
2026-04-10 18:54 ` [PATCH v2 08/23] plugins/amdgpu: Close dma-buf image file if the read fails Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 09/23] plugins/amdgpu: Flatten amdgpu_restore_init a bit Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 10/23] plugins/amdgpu: Add error handling for seek operations Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 11/23] plugins/amdgpu: Consolidate vm_info collection Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 12/23] plugins/amdgpu: Remove plugin_log_msg() Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 13/23] plugins/amdgpu: Reduce amount of debug logging a little bit Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 14/23] plugins/amdgpu: Do not eat the errno in kmtIoctl Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 15/23] plugins/amdgpu: Fix open_drm_render_device() Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 16/23] plugins/amdgpu: Check sdma operation type early and once Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 17/23] plugins/amdgpu: Add plugin to inventory even if process has no vmas Tvrtko Ursulin
2026-04-10 18:55 ` Tvrtko Ursulin [this message]
2026-04-10 18:55 ` [PATCH v2 19/23] plugins/amdgpu: Use the load_img helper in drm file restore Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 20/23] plugins/amdgpu: Convert away from libc buffered file IO Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 21/23] plugins/amdgpu: Use save_vma_updates for all call sites Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 22/23] plugins/amdgpu: amdgpu_plugin_drm_restore_file() does not need to use libdrm Tvrtko Ursulin
2026-04-10 18:55 ` [PATCH v2 23/23] plugins/amdgpu: Fix remaining wrong usages of pr_perror Tvrtko Ursulin
2026-04-13 18:23 ` [PATCH v2 00/23] Amdgpu plugin cleanups and fixes Andrei Vagin
2026-04-13 19:47 ` Tvrtko Ursulin
2026-04-13 20:03 ` Andrei Vagin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260410185514.51153-19-tvrtko.ursulin@igalia.com \
--to=tvrtko.ursulin@igalia.com \
--cc=David.Francis@amd.com \
--cc=criu@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox