* [PATCH v7] Add CRIU support for amdgpu dmabuf
@ 2025-06-17 19:45 David Francis
2025-06-17 19:45 ` [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle David Francis
` (3 more replies)
0 siblings, 4 replies; 11+ messages in thread
From: David Francis @ 2025-06-17 19:45 UTC (permalink / raw)
To: dri-devel
Cc: tvrtko.ursulin, Felix.Kuehling, David.YatSin, Chris.Freehill,
Christian.Koenig, dcostantino, sruffell, simona, mripard,
tzimmermann
This patch series adds support for CRIU checkpointing of processes that
share memory with the amdgpu dmabuf interface.
This v7 splits the bo and mapping info patches and uses the valid and
invalid lists instead of the rb tree.
A single macro that iterates over both lists is in the works.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle
2025-06-17 19:45 [PATCH v7] Add CRIU support for amdgpu dmabuf David Francis
@ 2025-06-17 19:45 ` David Francis
2025-06-17 20:29 ` Felix Kuehling
2025-06-17 19:45 ` [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info David Francis
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: David Francis @ 2025-06-17 19:45 UTC (permalink / raw)
To: dri-devel
Cc: tvrtko.ursulin, Felix.Kuehling, David.YatSin, Chris.Freehill,
Christian.Koenig, dcostantino, sruffell, simona, mripard,
tzimmermann, David Francis
CRIU restore of drm buffer objects requires the ability to create
or import a buffer object with a specific gem handle.
Add new drm ioctl DRM_IOCTL_GEM_CHANGE_HANDLE, which takes
the gem handle of an object and moves that object to a
specified new gem handle.
This ioctl needs to call drm_prime_remove_buf_handle,
but that function acquires the prime lock, which the ioctl
needs to hold for other purposes.
Make drm_prime_remove_buf_handle not acquire the prime lock,
and change its other caller to reflect this.
Signed-off-by: David Francis <David.Francis@amd.com>
---
drivers/gpu/drm/drm_gem.c | 58 ++++++++++++++++++++++++++++++++++
drivers/gpu/drm/drm_internal.h | 4 +++
drivers/gpu/drm/drm_ioctl.c | 1 +
drivers/gpu/drm/drm_prime.c | 6 +---
include/uapi/drm/drm.h | 17 ++++++++++
5 files changed, 81 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index c6240bab3fa5..631cbf9e1e2b 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -282,7 +282,12 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
+ mutex_lock(&file_priv->prime.lock);
+
drm_prime_remove_buf_handle(&file_priv->prime, id);
+
+ mutex_unlock(&file_priv->prime.lock);
+
drm_vma_node_revoke(&obj->vma_node, file_priv);
drm_gem_object_handle_put_unlocked(obj);
@@ -888,6 +893,59 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data,
return ret;
}
+/**
+ * drm_gem_open_ioctl - implementation of the GEM_CHANGE_HANDLE ioctl
+ * @dev: drm_device
+ * @data: ioctl data
+ * @file_priv: drm file-private structure
+ *
+ * Change the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_gem_change_handle *args = data;
+ struct drm_gem_object *obj;
+ int ret;
+
+ obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ if (args->handle == args->new_handle)
+ return 0;
+
+ mutex_lock(&file_priv->prime.lock);
+ spin_lock(&file_priv->table_lock);
+
+ ret = idr_alloc(&file_priv->object_idr, obj, args->new_handle, args->new_handle + 1, GFP_NOWAIT);
+ if (ret < 0)
+ goto out_unlock_table;
+
+ spin_unlock(&file_priv->table_lock);
+
+ if (obj->dma_buf) {
+ ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, args->new_handle);
+ if (ret < 0)
+ goto out_unlock_prime;
+
+ drm_prime_remove_buf_handle(&file_priv->prime, args->handle);
+ }
+
+ spin_lock(&file_priv->table_lock);
+ idr_remove(&file_priv->object_idr, args->handle);
+
+out_unlock_table:
+ spin_unlock(&file_priv->table_lock);
+out_unlock_prime:
+ mutex_unlock(&file_priv->prime.lock);
+
+ return ret;
+}
+
/**
* drm_gem_open_ioctl - implementation of the GEM_OPEN ioctl
* @dev: drm_device
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index b2b6a8e49dda..e9d5cdf7e033 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -85,6 +85,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
+int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
+ struct dma_buf *dma_buf, uint32_t handle);
void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
uint32_t handle);
@@ -168,6 +170,8 @@ int drm_gem_close_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
int drm_gem_open_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void drm_gem_open(struct drm_device *dev, struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f593dc569d31..d8a24875a7ba 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -653,6 +653,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_IOCTL_GEM_CHANGE_HANDLE, drm_gem_change_handle_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index bdb51c8f262e..1f2e858e5000 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -93,7 +93,7 @@ struct drm_prime_member {
struct rb_node handle_rb;
};
-static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
+int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
struct dma_buf *dma_buf, uint32_t handle)
{
struct drm_prime_member *member;
@@ -190,8 +190,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
{
struct rb_node *rb;
- mutex_lock(&prime_fpriv->lock);
-
rb = prime_fpriv->handles.rb_node;
while (rb) {
struct drm_prime_member *member;
@@ -210,8 +208,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
rb = rb->rb_left;
}
}
-
- mutex_unlock(&prime_fpriv->lock);
}
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 7fba37b94401..84c819c171d2 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -625,6 +625,15 @@ struct drm_gem_open {
__u64 size;
};
+/* DRM_IOCTL_GEM_CHANGE_HANDLE ioctl argument type */
+struct drm_gem_change_handle {
+ /** Current handle of object */
+ __u32 handle;
+
+ /** Handle to change that object to */
+ __u32 new_handle;
+};
+
/**
* DRM_CAP_DUMB_BUFFER
*
@@ -1305,6 +1314,14 @@ extern "C" {
*/
#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
2025-06-17 19:45 [PATCH v7] Add CRIU support for amdgpu dmabuf David Francis
2025-06-17 19:45 ` [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle David Francis
@ 2025-06-17 19:45 ` David Francis
2025-06-18 21:05 ` Felix Kuehling
2025-06-17 19:45 ` [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl David Francis
2025-06-17 19:45 ` [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects David Francis
3 siblings, 1 reply; 11+ messages in thread
From: David Francis @ 2025-06-17 19:45 UTC (permalink / raw)
To: dri-devel
Cc: tvrtko.ursulin, Felix.Kuehling, David.YatSin, Chris.Freehill,
Christian.Koenig, dcostantino, sruffell, simona, mripard,
tzimmermann, David Francis
Add new ioctl DRM_IOCTL_AMDGPU_CRIU_BO_INFO.
This ioctl returns a list of bos with their handles, sizes,
and flags and domains.
This ioctl is meant to be used during CRIU checkpoint and
provide information needed to reconstruct the bos
in CRIU restore.
Signed-off-by: David Francis <David.Francis@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 144 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 32 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +
include/uapi/drm/amdgpu_drm.h | 28 +++++
6 files changed, 209 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 87080c06e5fc..0863edcdd03f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -63,7 +63,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o \
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_criu.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
new file mode 100644
index 000000000000..34a0358946b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: MIT */
+/*
+* Copyright 2025 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <linux/dma-buf.h>
+#include <linux/hashtable.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
+#include "amdgpu_criu.h"
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/interval_tree_generic.h>
+
+#include "amdgpu.h"
+#include "amdgpu_display.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_xgmi.h"
+
+static uint32_t hardware_flags_to_uapi_flags(struct amdgpu_device *adev, uint64_t pte_flags)
+{
+ uint32_t gem_flags = 0;
+
+ //This function will be replaced by the mapping flags rework
+
+ if (pte_flags & AMDGPU_PTE_EXECUTABLE)
+ gem_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+ if (pte_flags & AMDGPU_PTE_READABLE)
+ gem_flags |= AMDGPU_VM_PAGE_READABLE;
+ if (pte_flags & AMDGPU_PTE_WRITEABLE)
+ gem_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (pte_flags & AMDGPU_PTE_PRT_FLAG(adev))
+ gem_flags |= AMDGPU_VM_PAGE_PRT;
+ if (pte_flags & AMDGPU_PTE_NOALLOC)
+ gem_flags |= AMDGPU_VM_PAGE_NOALLOC;
+
+ return gem_flags;
+}
+
+
+/**
+ * amdgpu_criu_bo_info_ioctl - get information about a process' buffer objects
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_criu_bo_info_args
+ * @filp: drm file pointer
+ *
+ * num_bos is set as an input to the size of the bo_buckets array.
+ * num_bos is sent back as output as the number of bos in the process.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_criu_bo_info_args *args = data;
+ struct drm_amdgpu_criu_bo_bucket *bo_buckets;
+ struct drm_gem_object *gobj;
+ int id, ret = 0;
+ int bo_index = 0;
+ int num_bos = 0;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id)
+ num_bos += 1;
+ spin_unlock(&filp->table_lock);
+
+ if (args->num_bos < num_bos) {
+ args->num_bos = num_bos;
+ goto exit;
+ }
+ args->num_bos = num_bos;
+ if (num_bos == 0) {
+ goto exit;
+ }
+
+ bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets) {
+ ret = -ENOMEM;
+ goto free_buckets;
+ }
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct drm_amdgpu_criu_bo_bucket *bo_bucket;
+
+ bo_bucket = &bo_buckets[bo_index];
+
+ bo_bucket->size = amdgpu_bo_size(bo);
+ bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
+ bo_bucket->preferred_domains = bo->preferred_domains;
+ bo_bucket->gem_handle = id;
+
+ if (bo->tbo.base.import_attach)
+ bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_IS_IMPORT;
+
+ bo_index += 1;
+ }
+ spin_unlock(&filp->table_lock);
+
+ ret = copy_to_user((void __user *)args->bo_buckets, bo_buckets, num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_debug("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ }
+
+free_buckets:
+ kvfree(bo_buckets);
+exit:
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
new file mode 100644
index 000000000000..1b18ffee6587
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+* Copyright 2025 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef __AMDGPU_CRIU_H__
+#define __AMDGPU_CRIU_H__
+
+#include <drm/amdgpu_drm.h>
+
+int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4db92e0a60da..bf33567bb166 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -53,6 +53,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_userq.h"
#include "amdgpu_userq_fence.h"
+#include "amdgpu_criu.h"
#include "../amdxcp/amdgpu_xcp_drv.h"
/*
@@ -3021,6 +3022,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_BO_INFO, amdgpu_criu_bo_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a2149afa5803..a8cf2d4580cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -45,6 +45,8 @@
#include "amdgpu_dma_buf.h"
#include "kfd_debug.h"
+#include "amdgpu_criu.h"
+
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
static int kfd_release(struct inode *, struct file *);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 45c4fa13499c..1508c55ff92a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -57,6 +57,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ 0x16
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
+#define DRM_AMDGPU_CRIU_BO_INFO 0x19
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -77,6 +78,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
+#define DRM_IOCTL_AMDGPU_CRIU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_BO_INFO, struct drm_amdgpu_criu_bo_info_args)
/**
* DOC: memory domains
@@ -1626,6 +1628,32 @@ struct drm_color_ctm_3x4 {
__u64 matrix[12];
};
+#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT (1 << 0)
+
+struct drm_amdgpu_criu_bo_info_args {
+ /* IN: Size of bo_buckets buffer. OUT: Number of bos in process (if larger than size of buffer, must retry) */
+ __u32 num_bos;
+
+ /* User pointer to array of drm_amdgpu_criu_bo_bucket */
+ __u64 bo_buckets;
+};
+
+struct drm_amdgpu_criu_bo_bucket {
+ /* Size of bo */
+ __u64 size;
+
+ /* GEM_CREATE flags for re-creation of buffer */
+ __u64 alloc_flags;
+
+ /* Pending how to handle this; provides information needed to remake the buffer on restore */
+ __u32 preferred_domains;
+
+ /* Currently just one flag: IS_IMPORT */
+ __u32 flags;
+
+ __u32 gem_handle;
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl
2025-06-17 19:45 [PATCH v7] Add CRIU support for amdgpu dmabuf David Francis
2025-06-17 19:45 ` [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle David Francis
2025-06-17 19:45 ` [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info David Francis
@ 2025-06-17 19:45 ` David Francis
2025-06-18 12:42 ` kernel test robot
2025-06-17 19:45 ` [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects David Francis
3 siblings, 1 reply; 11+ messages in thread
From: David Francis @ 2025-06-17 19:45 UTC (permalink / raw)
To: dri-devel
Cc: tvrtko.ursulin, Felix.Kuehling, David.YatSin, Chris.Freehill,
Christian.Koenig, dcostantino, sruffell, simona, mripard,
tzimmermann, David Francis
Add new ioctl DRM_IOCTL_AMDGPU_CRIU_MAPPING_INFO, which
returns a list of mappings associated with a given bo, along with
their positions and offsets.
Signed-off-by: David Francis <David.Francis@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 96 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ++
include/uapi/drm/amdgpu_drm.h | 27 +++++++
5 files changed, 131 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
index 34a0358946b6..dd677367a82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
@@ -142,3 +142,99 @@ int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
return ret;
}
+
+/**
+ * amdgpu_criu_mapping_info_ioctl - get information about a buffer's mappings
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_criu_mapping_info_args
+ * @filp: drm file pointer
+ *
+ * num_mappings is set as an input to the size of the vm_buckets array.
+ * num_mappings is sent back as output as the number of mappings the bo has.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_criu_mapping_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_criu_mapping_info_args *args = data;
+ struct drm_gem_object *gobj = idr_find(&filp->object_idr, args->gem_handle);
+ struct amdgpu_vm *avm = &((struct amdgpu_fpriv *)filp->driver_priv)->vm;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(avm, bo);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct drm_amdgpu_criu_vm_bucket *vm_buckets;
+ struct amdgpu_bo_va_mapping *mapping;
+ struct drm_exec exec;
+ int num_mappings = 0;
+ int ret;
+
+ vm_buckets = kvzalloc(args->num_mappings * sizeof(*vm_buckets), GFP_KERNEL);
+ if (!vm_buckets) {
+ ret = -ENOMEM;
+ goto free_vms;
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ if (gobj) {
+ ret = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ goto unlock_exec;
+ }
+
+ ret = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ goto unlock_exec;
+ }
+
+ amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_mappings) {
+ vm_buckets[num_mappings].start = mapping->start;
+ vm_buckets[num_mappings].last = mapping->last;
+ vm_buckets[num_mappings].offset = mapping->offset;
+ vm_buckets[num_mappings].flags = hardware_flags_to_uapi_flags(drm_to_adev(dev), mapping->flags);
+ }
+ num_mappings += 1;
+ }
+
+ amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
+ if (num_mappings < args->num_mappings) {
+ vm_buckets[num_mappings].start = mapping->start;
+ vm_buckets[num_mappings].last = mapping->last;
+ vm_buckets[num_mappings].offset = mapping->offset;
+ vm_buckets[num_mappings].flags = hardware_flags_to_uapi_flags(drm_to_adev(dev), mapping->flags);
+ }
+ num_mappings += 1;
+ }
+
+ drm_exec_fini(&exec);
+
+ if (num_mappings > 0) {
+ if (num_mappings <= args->num_mappings) {
+ ret = copy_to_user((void __user *)args->vm_buckets, vm_buckets, num_mappings * sizeof(*vm_buckets));
+ if (ret) {
+ pr_debug("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ }
+ }
+ }
+ args->num_mappings = num_mappings;
+
+ kvfree(vm_buckets);
+
+ return ret;
+unlock_exec:
+ drm_exec_fini(&exec);
+free_vms:
+ kvfree(vm_buckets);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
index 1b18ffee6587..486c341729ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
@@ -28,5 +28,7 @@
int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+int amdgpu_criu_mapping_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bf33567bb166..5f3de93a665d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3023,6 +3023,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_BO_INFO, amdgpu_criu_bo_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_MAPPING_INFO, amdgpu_criu_mapping_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index f3ad687125ad..978d6b29e626 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -668,4 +668,9 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
+#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &bo_va->valids, list)
+#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &bo_va->invalids, list)
+
#endif
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1508c55ff92a..ab09ae9890e7 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_CRIU_BO_INFO 0x19
+#define DRM_AMDGPU_CRIU_MAPPING_INFO 0x20
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_CRIU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_BO_INFO, struct drm_amdgpu_criu_bo_info_args)
+#define DRM_IOCTL_AMDGPU_CRIU_MAPPING_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_MAPPING_INFO, struct drm_amdgpu_criu_mapping_info_args)
/**
* DOC: memory domains
@@ -1654,6 +1656,31 @@ struct drm_amdgpu_criu_bo_bucket {
__u32 gem_handle;
};
+struct drm_amdgpu_criu_mapping_info_args {
+ /* Handle of bo to get mappings of */
+ __u32 gem_handle;
+
+ /* IN: Size of vm_buckets buffer. OUT: Number of bos in process (if larger than size of buffer, must retry) */
+ __u32 num_mappings;
+
+ /* User pointer to array of drm_amdgpu_criu_vm_bucket */
+ __u64 vm_buckets;
+};
+
+struct drm_amdgpu_criu_vm_bucket {
+ /* Start of mapping (in number of pages) */
+ __u64 start;
+
+ /* End of mapping (in number of pages) */
+ __u64 last;
+
+ /* Mapping offset */
+ __u64 offset;
+
+ /* flags needed to recreate mapping; still pending how to get these */
+ __u64 flags;
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects
2025-06-17 19:45 [PATCH v7] Add CRIU support for amdgpu dmabuf David Francis
` (2 preceding siblings ...)
2025-06-17 19:45 ` [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl David Francis
@ 2025-06-17 19:45 ` David Francis
2025-06-17 21:11 ` Felix Kuehling
3 siblings, 1 reply; 11+ messages in thread
From: David Francis @ 2025-06-17 19:45 UTC (permalink / raw)
To: dri-devel
Cc: tvrtko.ursulin, Felix.Kuehling, David.YatSin, Chris.Freehill,
Christian.Koenig, dcostantino, sruffell, simona, mripard,
tzimmermann, David Francis
The kfd CRIU checkpoint ioctl would return an error if trying
to checkpoint a process with no kfd buffer objects.
This is a normal case and should not be an error.
Signed-off-by: David Francis <David.Francis@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a8cf2d4580cc..9617c696d5b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2569,8 +2569,8 @@ static int criu_restore(struct file *filep,
pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
- if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
- !args->num_devices || !args->num_bos)
+ if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
+ !args->priv_data_size || !args->num_devices)
return -EINVAL;
mutex_lock(&p->mutex);
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle
2025-06-17 19:45 ` [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle David Francis
@ 2025-06-17 20:29 ` Felix Kuehling
0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2025-06-17 20:29 UTC (permalink / raw)
To: David Francis, dri-devel
Cc: tvrtko.ursulin, David.YatSin, Chris.Freehill, Christian.Koenig,
dcostantino, sruffell, simona, mripard, tzimmermann
On 2025-06-17 15:45, David Francis wrote:
> CRIU restore of drm buffer objects requires the ability to create
> or import a buffer object with a specific gem handle.
>
> Add new drm ioctl DRM_IOCTL_GEM_CHANGE_HANDLE, which takes
> the gem handle of an object and moves that object to a
> specified new gem handle.
>
> This ioctl needs to call drm_prime_remove_buf_handle,
> but that function acquires the prime lock, which the ioctl
> needs to hold for other purposes.
>
> Make drm_prime_remove_buf_handle not acquire the prime lock,
> and change its other caller to reflect this.
>
> Signed-off-by: David Francis <David.Francis@amd.com>
> ---
> drivers/gpu/drm/drm_gem.c | 58 ++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/drm_internal.h | 4 +++
> drivers/gpu/drm/drm_ioctl.c | 1 +
> drivers/gpu/drm/drm_prime.c | 6 +---
> include/uapi/drm/drm.h | 17 ++++++++++
> 5 files changed, 81 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index c6240bab3fa5..631cbf9e1e2b 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -282,7 +282,12 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
> if (obj->funcs->close)
> obj->funcs->close(obj, file_priv);
>
> + mutex_lock(&file_priv->prime.lock);
> +
> drm_prime_remove_buf_handle(&file_priv->prime, id);
> +
> + mutex_unlock(&file_priv->prime.lock);
> +
> drm_vma_node_revoke(&obj->vma_node, file_priv);
>
> drm_gem_object_handle_put_unlocked(obj);
> @@ -888,6 +893,59 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data,
> return ret;
> }
>
> +/**
> + * drm_gem_open_ioctl - implementation of the GEM_CHANGE_HANDLE ioctl
Update the function name.
> + * @dev: drm_device
> + * @data: ioctl data
> + * @file_priv: drm file-private structure
> + *
> + * Change the handle of a GEM object to the specified one.
> + * The new handle must be unused. On success the old handle is closed
> + * and all further IOCTL should refer to the new handle only.
> + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
> + */
> +int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct drm_gem_change_handle *args = data;
> + struct drm_gem_object *obj;
> + int ret;
> +
> + obj = drm_gem_object_lookup(file_priv, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + if (args->handle == args->new_handle)
> + return 0;
> +
> + mutex_lock(&file_priv->prime.lock);
> + spin_lock(&file_priv->table_lock);
> +
> + ret = idr_alloc(&file_priv->object_idr, obj, args->new_handle, args->new_handle + 1, GFP_NOWAIT);
> + if (ret < 0)
> + goto out_unlock_table;
You can move the return-value-check to after the spin-unlock to simplify
the error handling.
> +
> + spin_unlock(&file_priv->table_lock);
> +
> + if (obj->dma_buf) {
> + ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, args->new_handle);
> + if (ret < 0)
> + goto out_unlock_prime;
Don't you need to idr_remove the new handle here to avoid leaking it?
Maybe just let it fall through and make the idr_remove below conditional
to remove the old or new handle depending on the value of ret.
Regards,
Felix
> +
> + drm_prime_remove_buf_handle(&file_priv->prime, args->handle);
> + }
> +
> + spin_lock(&file_priv->table_lock);
> + idr_remove(&file_priv->object_idr, args->handle);
> +
> +out_unlock_table:
> + spin_unlock(&file_priv->table_lock);
> +out_unlock_prime:
> + mutex_unlock(&file_priv->prime.lock);
> +
> + return ret;
> +}
> +
> /**
> * drm_gem_open_ioctl - implementation of the GEM_OPEN ioctl
> * @dev: drm_device
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index b2b6a8e49dda..e9d5cdf7e033 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -85,6 +85,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>
> void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
> void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
> +int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
> + struct dma_buf *dma_buf, uint32_t handle);
> void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
> uint32_t handle);
>
> @@ -168,6 +170,8 @@ int drm_gem_close_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
> int drm_gem_open_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> void drm_gem_open(struct drm_device *dev, struct drm_file *file_private);
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index f593dc569d31..d8a24875a7ba 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -653,6 +653,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
> DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
> + DRM_IOCTL_DEF(DRM_IOCTL_GEM_CHANGE_HANDLE, drm_gem_change_handle_ioctl, DRM_RENDER_ALLOW),
>
> DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0),
>
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index bdb51c8f262e..1f2e858e5000 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -93,7 +93,7 @@ struct drm_prime_member {
> struct rb_node handle_rb;
> };
>
> -static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
> +int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
> struct dma_buf *dma_buf, uint32_t handle)
> {
> struct drm_prime_member *member;
> @@ -190,8 +190,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
> {
> struct rb_node *rb;
>
> - mutex_lock(&prime_fpriv->lock);
> -
> rb = prime_fpriv->handles.rb_node;
> while (rb) {
> struct drm_prime_member *member;
> @@ -210,8 +208,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
> rb = rb->rb_left;
> }
> }
> -
> - mutex_unlock(&prime_fpriv->lock);
> }
>
> void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 7fba37b94401..84c819c171d2 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -625,6 +625,15 @@ struct drm_gem_open {
> __u64 size;
> };
>
> +/* DRM_IOCTL_GEM_CHANGE_HANDLE ioctl argument type */
> +struct drm_gem_change_handle {
> + /** Current handle of object */
> + __u32 handle;
> +
> + /** Handle to change that object to */
> + __u32 new_handle;
> +};
> +
> /**
> * DRM_CAP_DUMB_BUFFER
> *
> @@ -1305,6 +1314,14 @@ extern "C" {
> */
> #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
>
> +/**
> + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
> + *
> + * Some applications (notably CRIU) need objects to have specific gem handles.
> + * This ioctl changes the object at one gem handle to use a new gem handle.
> + */
> +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
> +
> /*
> * Device specific ioctls should only be in their respective headers
> * The device specific ioctl range is from 0x40 to 0x9f.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects
2025-06-17 19:45 ` [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects David Francis
@ 2025-06-17 21:11 ` Felix Kuehling
0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2025-06-17 21:11 UTC (permalink / raw)
To: David Francis, dri-devel
Cc: tvrtko.ursulin, David.YatSin, Chris.Freehill, Christian.Koenig,
dcostantino, sruffell, simona, mripard, tzimmermann
On 2025-06-17 15:45, David Francis wrote:
> The kfd CRIU checkpoint ioctl would return an error if trying
> to checkpoint a process with no kfd buffer objects.
>
> This is a normal case and should not be an error.
>
> Signed-off-by: David Francis <David.Francis@amd.com>
This patch is
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index a8cf2d4580cc..9617c696d5b6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2569,8 +2569,8 @@ static int criu_restore(struct file *filep,
> pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
> args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
>
> - if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
> - !args->num_devices || !args->num_bos)
> + if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
> + !args->priv_data_size || !args->num_devices)
> return -EINVAL;
>
> mutex_lock(&p->mutex);
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl
2025-06-17 19:45 ` [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl David Francis
@ 2025-06-18 12:42 ` kernel test robot
0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2025-06-18 12:42 UTC (permalink / raw)
To: David Francis, dri-devel
Cc: llvm, oe-kbuild-all, tvrtko.ursulin, Felix.Kuehling, David.YatSin,
Chris.Freehill, Christian.Koenig, dcostantino, sruffell, simona,
mripard, tzimmermann, David Francis
Hi David,
kernel test robot noticed the following build errors:
[auto build test ERROR on drm-exynos/exynos-drm-next]
[also build test ERROR on linus/master v6.16-rc2]
[cannot apply to next-20250618]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/David-Francis/drm-amdgpu-Add-CRIU-ioctl-to-get-bo-info/20250618-044539
base: https://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git exynos-drm-next
patch link: https://lore.kernel.org/r/20250617194536.538681-4-David.Francis%40amd.com
patch subject: [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl
config: i386-buildonly-randconfig-005-20250618 (https://download.01.org/0day-ci/archive/20250618/202506182058.pHESGM1d-lkp@intel.com/config)
compiler: clang version 20.1.2 (https://github.com/llvm/llvm-project 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250618/202506182058.pHESGM1d-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506182058.pHESGM1d-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'invalids' in 'struct amdgpu_bo_va_mapping'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^ ~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:40: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~ ^
include/linux/list.h:770:30: note: expanded from macro 'list_for_each_entry'
770 | for (pos = list_first_entry(head, typeof(*pos), member); \
| ^~~~
include/linux/list.h:612:14: note: expanded from macro 'list_first_entry'
612 | list_entry((ptr)->next, type, member)
| ^~~
include/linux/list.h:601:15: note: expanded from macro 'list_entry'
601 | container_of(ptr, type, member)
| ^~~
include/linux/container_of.h:19:26: note: expanded from macro 'container_of'
19 | void *__mptr = (void *)(ptr); \
| ^~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'invalids' in 'struct amdgpu_bo_va_mapping'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^ ~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:40: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~ ^
include/linux/list.h:770:30: note: expanded from macro 'list_for_each_entry'
770 | for (pos = list_first_entry(head, typeof(*pos), member); \
| ^~~~
include/linux/list.h:612:14: note: expanded from macro 'list_first_entry'
612 | list_entry((ptr)->next, type, member)
| ^~~
note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
include/linux/compiler_types.h:503:63: note: expanded from macro '__same_type'
503 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ^~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'invalids' in 'struct amdgpu_bo_va_mapping'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^ ~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:40: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~ ^
include/linux/list.h:770:30: note: expanded from macro 'list_for_each_entry'
770 | for (pos = list_first_entry(head, typeof(*pos), member); \
| ^~~~
include/linux/list.h:612:14: note: expanded from macro 'list_first_entry'
612 | list_entry((ptr)->next, type, member)
| ^~~
note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
include/linux/compiler_types.h:503:63: note: expanded from macro '__same_type'
503 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ^~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:3: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ^ ~~~~
include/linux/list.h:770:13: note: expanded from macro 'list_for_each_entry'
770 | for (pos = list_first_entry(head, typeof(*pos), member); \
| ^ ~~~~~~
include/linux/list.h:612:2: note: expanded from macro 'list_first_entry'
612 | list_entry((ptr)->next, type, member)
| ^ ~~~~~~
include/linux/list.h:601:2: note: expanded from macro 'list_entry'
601 | container_of(ptr, type, member)
| ^ ~~~~~~
include/linux/container_of.h:23:21: note: expanded from macro 'container_of'
23 | ((type *)(__mptr - offsetof(type, member))); })
| ^ ~~~~~~
include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
16 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
| ^ ~~~~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'struct amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^ ~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:50: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~~~ ^
include/linux/list.h:771:38: note: expanded from macro 'list_for_each_entry'
771 | !list_entry_is_head(pos, head, member); \
| ~~~ ^
include/linux/list.h:761:21: note: expanded from macro 'list_entry_is_head'
761 | list_is_head(&pos->member, (head))
| ~~~ ^
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'invalids' in 'struct amdgpu_bo_va_mapping'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^ ~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:40: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~ ^
include/linux/list.h:771:32: note: expanded from macro 'list_for_each_entry'
771 | !list_entry_is_head(pos, head, member); \
| ^~~~
include/linux/list.h:761:30: note: expanded from macro 'list_entry_is_head'
761 | list_is_head(&pos->member, (head))
| ^~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'struct amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:50: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~
include/linux/list.h:772:34: note: expanded from macro 'list_for_each_entry'
772 | pos = list_next_entry(pos, member))
| ~~~~~~~~~~~~~~~~~~~~~^~~~~~~
include/linux/list.h:645:20: note: expanded from macro 'list_next_entry'
645 | list_entry((pos)->member.next, typeof(*(pos)), member)
| ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/list.h:601:15: note: expanded from macro 'list_entry'
601 | container_of(ptr, type, member)
| ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~
include/linux/container_of.h:19:26: note: expanded from macro 'container_of'
19 | void *__mptr = (void *)(ptr); \
| ^~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'struct amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:50: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~
include/linux/list.h:772:34: note: expanded from macro 'list_for_each_entry'
772 | pos = list_next_entry(pos, member))
| ~~~~~~~~~~~~~~~~~~~~~^~~~~~~
include/linux/list.h:645:20: note: expanded from macro 'list_next_entry'
645 | list_entry((pos)->member.next, typeof(*(pos)), member)
| ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
include/linux/compiler_types.h:503:63: note: expanded from macro '__same_type'
503 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'struct amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:50: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~
include/linux/list.h:772:34: note: expanded from macro 'list_for_each_entry'
772 | pos = list_next_entry(pos, member))
| ~~~~~~~~~~~~~~~~~~~~~^~~~~~~
include/linux/list.h:645:20: note: expanded from macro 'list_next_entry'
645 | list_entry((pos)->member.next, typeof(*(pos)), member)
| ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
include/linux/compiler_types.h:503:63: note: expanded from macro '__same_type'
503 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c:208:2: error: no member named 'list' in 'amdgpu_bo_va'
208 | amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h:674:3: note: expanded from macro 'amdgpu_vm_bo_va_for_each_invalid_mapping'
674 | list_for_each_entry(mapping, &bo_va->invalids, list)
| ^ ~~~~
include/linux/list.h:772:13: note: expanded from macro 'list_for_each_entry'
772 | pos = list_next_entry(pos, member))
| ^ ~~~~~~
include/linux/list.h:645:2: note: expanded from macro 'list_next_entry'
645 | list_entry((pos)->member.next, typeof(*(pos)), member)
| ^ ~~~~~~
include/linux/list.h:601:2: note: expanded from macro 'list_entry'
601 | container_of(ptr, type, member)
| ^ ~~~~~~
include/linux/container_of.h:23:21: note: expanded from macro 'container_of'
23 | ((type *)(__mptr - offsetof(type, member))); })
| ^ ~~~~~~
include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
16 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
| ^ ~~~~~~
10 errors generated.
vim +208 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
145
146 /**
147 * amdgpu_criu_mapping_info_ioctl - get information about a buffer's mappings
148 *
149 * @dev: drm device pointer
150 * @data: drm_amdgpu_criu_mapping_info_args
151 * @filp: drm file pointer
152 *
153 * num_mappings is set as an input to the size of the vm_buckets array.
154 * num_mappings is sent back as output as the number of mappings the bo has.
155 * If that number is larger than the size of the array, the ioctl must
156 * be retried.
157 *
158 * Returns:
159 * 0 for success, -errno for errors.
160 */
161 int amdgpu_criu_mapping_info_ioctl(struct drm_device *dev, void *data,
162 struct drm_file *filp)
163 {
164 struct drm_amdgpu_criu_mapping_info_args *args = data;
165 struct drm_gem_object *gobj = idr_find(&filp->object_idr, args->gem_handle);
166 struct amdgpu_vm *avm = &((struct amdgpu_fpriv *)filp->driver_priv)->vm;
167 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
168 struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(avm, bo);
169 struct amdgpu_fpriv *fpriv = filp->driver_priv;
170 struct drm_amdgpu_criu_vm_bucket *vm_buckets;
171 struct amdgpu_bo_va_mapping *mapping;
172 struct drm_exec exec;
173 int num_mappings = 0;
174 int ret;
175
176 vm_buckets = kvzalloc(args->num_mappings * sizeof(*vm_buckets), GFP_KERNEL);
177 if (!vm_buckets) {
178 ret = -ENOMEM;
179 goto free_vms;
180 }
181
182 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
183 DRM_EXEC_IGNORE_DUPLICATES, 0);
184 drm_exec_until_all_locked(&exec) {
185 if (gobj) {
186 ret = drm_exec_lock_obj(&exec, gobj);
187 drm_exec_retry_on_contention(&exec);
188 if (ret)
189 goto unlock_exec;
190 }
191
192 ret = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
193 drm_exec_retry_on_contention(&exec);
194 if (ret)
195 goto unlock_exec;
196 }
197
198 amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
199 if (num_mappings < args->num_mappings) {
200 vm_buckets[num_mappings].start = mapping->start;
201 vm_buckets[num_mappings].last = mapping->last;
202 vm_buckets[num_mappings].offset = mapping->offset;
203 vm_buckets[num_mappings].flags = hardware_flags_to_uapi_flags(drm_to_adev(dev), mapping->flags);
204 }
205 num_mappings += 1;
206 }
207
> 208 amdgpu_vm_bo_va_for_each_invalid_mapping(mapping, bo_va) {
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
2025-06-17 19:45 ` [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info David Francis
@ 2025-06-18 21:05 ` Felix Kuehling
2025-06-19 18:37 ` Francis, David
0 siblings, 1 reply; 11+ messages in thread
From: Felix Kuehling @ 2025-06-18 21:05 UTC (permalink / raw)
To: David Francis, dri-devel
Cc: tvrtko.ursulin, David.YatSin, Chris.Freehill, Christian.Koenig,
dcostantino, sruffell, simona, mripard, tzimmermann
On 2025-06-17 15:45, David Francis wrote:
> Add new ioctl DRM_IOCTL_AMDGPU_CRIU_BO_INFO.
>
> This ioctl returns a list of bos with their handles, sizes,
> and flags and domains.
>
> This ioctl is meant to be used during CRIU checkpoint and
> provide information needed to reconstruct the bos
> in CRIU restore.
>
> Signed-off-by: David Francis <David.Francis@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 144 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 32 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +
> include/uapi/drm/amdgpu_drm.h | 28 +++++
> 6 files changed, 209 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 87080c06e5fc..0863edcdd03f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -63,7 +63,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
> amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> - amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_criu.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
> amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> new file mode 100644
> index 000000000000..34a0358946b6
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> @@ -0,0 +1,144 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright 2025 Advanced Micro Devices, Inc.
> +*
> +* Permission is hereby granted, free of charge, to any person obtaining a
> +* copy of this software and associated documentation files (the "Software"),
> +* to deal in the Software without restriction, including without limitation
> +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +* and/or sell copies of the Software, and to permit persons to whom the
> +* Software is furnished to do so, subject to the following conditions:
> +*
> +* The above copyright notice and this permission notice shall be included in
> +* all copies or substantial portions of the Software.
> +*
> +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +* OTHER DEALINGS IN THE SOFTWARE.
> +*/
> +
> +#include <linux/dma-buf.h>
> +#include <linux/hashtable.h>
> +#include <linux/mutex.h>
> +#include <linux/random.h>
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_file.h>
> +
> +#include "amdgpu_criu.h"
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
> +#include <drm/drm_exec.h>
> +#include <drm/drm_gem_ttm_helper.h>
> +#include <drm/ttm/ttm_tt.h>
> +#include <linux/interval_tree_generic.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_display.h"
> +#include "amdgpu_dma_buf.h"
> +#include "amdgpu_hmm.h"
> +#include "amdgpu_xgmi.h"
That's a lot of header files. Do you really need them all?
> +
> +static uint32_t hardware_flags_to_uapi_flags(struct amdgpu_device *adev, uint64_t pte_flags)
This function is never called.
> +{
> + uint32_t gem_flags = 0;
> +
> + //This function will be replaced by the mapping flags rework
> +
> + if (pte_flags & AMDGPU_PTE_EXECUTABLE)
> + gem_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
> + if (pte_flags & AMDGPU_PTE_READABLE)
> + gem_flags |= AMDGPU_VM_PAGE_READABLE;
> + if (pte_flags & AMDGPU_PTE_WRITEABLE)
> + gem_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> + if (pte_flags & AMDGPU_PTE_PRT_FLAG(adev))
> + gem_flags |= AMDGPU_VM_PAGE_PRT;
> + if (pte_flags & AMDGPU_PTE_NOALLOC)
> + gem_flags |= AMDGPU_VM_PAGE_NOALLOC;
> +
> + return gem_flags;
> +}
> +
> +
> +/**
> + * amdgpu_criu_bo_info_ioctl - get information about a process' buffer objects
> + *
> + * @dev: drm device pointer
> + * @data: drm_amdgpu_criu_bo_info_args
> + * @filp: drm file pointer
> + *
> + * num_bos is set as an input to the size of the bo_buckets array.
> + * num_bos is sent back as output as the number of bos in the process.
> + * If that number is larger than the size of the array, the ioctl must
> + * be retried.
> + *
> + * Returns:
> + * 0 for success, -errno for errors.
> + */
> +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct drm_amdgpu_criu_bo_info_args *args = data;
> + struct drm_amdgpu_criu_bo_bucket *bo_buckets;
> + struct drm_gem_object *gobj;
> + int id, ret = 0;
> + int bo_index = 0;
> + int num_bos = 0;
> +
> + spin_lock(&filp->table_lock);
> + idr_for_each_entry(&filp->object_idr, gobj, id)
> + num_bos += 1;
> + spin_unlock(&filp->table_lock);
> +
> + if (args->num_bos < num_bos) {
> + args->num_bos = num_bos;
> + goto exit;
Since this path doesn't require any cleanup, you could just return. But maybe this should return an error code to let user mode know that it should retry with a bigger bucket allocation. -ENOSPC?
> + }
> + args->num_bos = num_bos;
> + if (num_bos == 0) {
> + goto exit;
Just return. 0 (success) seems fine here.
> + }
> +
> + bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
> + if (!bo_buckets) {
> + ret = -ENOMEM;
> + goto free_buckets;
Just return -ENOMEM.
> + }
> +
> + spin_lock(&filp->table_lock);
> + idr_for_each_entry(&filp->object_idr, gobj, id) {
> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> + struct drm_amdgpu_criu_bo_bucket *bo_bucket;
> +
> + bo_bucket = &bo_buckets[bo_index];
> +
> + bo_bucket->size = amdgpu_bo_size(bo);
> + bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
I'm not sure why you're removing this flag. I think we always set it implicitly when creating a new BO (and presumably during restore), but there should be no harm in leaving this flag set anyway.
> + bo_bucket->preferred_domains = bo->preferred_domains;
> + bo_bucket->gem_handle = id;
> +
> + if (bo->tbo.base.import_attach)
> + bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_IS_IMPORT;
> +
> + bo_index += 1;
> + }
> + spin_unlock(&filp->table_lock);
> +
> + ret = copy_to_user((void __user *)args->bo_buckets, bo_buckets, num_bos * sizeof(*bo_buckets));
> + if (ret) {
> + pr_debug("Failed to copy BO information to user\n");
> + ret = -EFAULT;
> + }
> +
> +free_buckets:
> + kvfree(bo_buckets);
> +exit:
> +
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> new file mode 100644
> index 000000000000..1b18ffee6587
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright 2025 Advanced Micro Devices, Inc.
> +*
> +* Permission is hereby granted, free of charge, to any person obtaining a
> +* copy of this software and associated documentation files (the "Software"),
> +* to deal in the Software without restriction, including without limitation
> +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +* and/or sell copies of the Software, and to permit persons to whom the
> +* Software is furnished to do so, subject to the following conditions:
> +*
> +* The above copyright notice and this permission notice shall be included in
> +* all copies or substantial portions of the Software.
> +*
> +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +* OTHER DEALINGS IN THE SOFTWARE.
> +*/
> +
> +#ifndef __AMDGPU_CRIU_H__
> +#define __AMDGPU_CRIU_H__
> +
> +#include <drm/amdgpu_drm.h>
Why is this needed here? You're not using any uapi definitions below.
> +
> +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4db92e0a60da..bf33567bb166 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -53,6 +53,7 @@
> #include "amdgpu_xgmi.h"
> #include "amdgpu_userq.h"
> #include "amdgpu_userq_fence.h"
> +#include "amdgpu_criu.h"
> #include "../amdxcp/amdgpu_xcp_drv.h"
>
> /*
> @@ -3021,6 +3022,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_BO_INFO, amdgpu_criu_bo_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> };
>
> static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index a2149afa5803..a8cf2d4580cc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -45,6 +45,8 @@
> #include "amdgpu_dma_buf.h"
> #include "kfd_debug.h"
>
> +#include "amdgpu_criu.h"
> +
Why is this header file needed here? None of the new ioctls will be implemented in kfd_chardev.c.
Regards,
Felix
> static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> static int kfd_open(struct inode *, struct file *);
> static int kfd_release(struct inode *, struct file *);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 45c4fa13499c..1508c55ff92a 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -57,6 +57,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ 0x16
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> +#define DRM_AMDGPU_CRIU_BO_INFO 0x19
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -77,6 +78,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> +#define DRM_IOCTL_AMDGPU_CRIU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_BO_INFO, struct drm_amdgpu_criu_bo_info_args)
>
> /**
> * DOC: memory domains
> @@ -1626,6 +1628,32 @@ struct drm_color_ctm_3x4 {
> __u64 matrix[12];
> };
>
> +#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT (1 << 0)
> +
> +struct drm_amdgpu_criu_bo_info_args {
> + /* IN: Size of bo_buckets buffer. OUT: Number of bos in process (if larger than size of buffer, must retry) */
> + __u32 num_bos;
> +
> + /* User pointer to array of drm_amdgpu_criu_bo_bucket */
> + __u64 bo_buckets;
> +};
> +
> +struct drm_amdgpu_criu_bo_bucket {
> + /* Size of bo */
> + __u64 size;
> +
> + /* GEM_CREATE flags for re-creation of buffer */
> + __u64 alloc_flags;
> +
> + /* Pending how to handle this; provides information needed to remake the buffer on restore */
> + __u32 preferred_domains;
> +
> + /* Currently just one flag: IS_IMPORT */
> + __u32 flags;
> +
> + __u32 gem_handle;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
2025-06-18 21:05 ` Felix Kuehling
@ 2025-06-19 18:37 ` Francis, David
2025-06-19 19:35 ` Felix Kuehling
0 siblings, 1 reply; 11+ messages in thread
From: Francis, David @ 2025-06-19 18:37 UTC (permalink / raw)
To: Kuehling, Felix, dri-devel@lists.freedesktop.org
Cc: tvrtko.ursulin@igalia.com, Yat Sin, David, Freehill, Chris,
Koenig, Christian, dcostantino@meta.com, sruffell@meta.com,
simona@ffwll.ch, mripard@kernel.org, tzimmermann@suse.de
[-- Attachment #1: Type: text/plain, Size: 16159 bytes --]
[AMD Official Use Only - AMD Internal Distribution Only]
> > + spin_lock(&filp->table_lock);
> > + idr_for_each_entry(&filp->object_idr, gobj, id)
> > + num_bos += 1;
> > + spin_unlock(&filp->table_lock);
> > +
> > + if (args->num_bos < num_bos) {
> > + args->num_bos = num_bos;
> > + goto exit;
>
> Since this path doesn't require any cleanup, you could just return. But maybe this should return an error code to let user mode know that it should retry with a bigger bucket allocation. -ENOSPC?
>
I wasn't sure if sending back information from an ioctl in the structs while also returning an error value was acceptable. If it is, I'll make that change.
> > + idr_for_each_entry(&filp->object_idr, gobj, id) {
> > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> > + struct drm_amdgpu_criu_bo_bucket *bo_bucket;
> > +
> > + bo_bucket = &bo_buckets[bo_index];
> > +
> > + bo_bucket->size = amdgpu_bo_size(bo);
> > + bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
>
> I'm not sure why you're removing this flag. I think we always set it implicitly when creating a new BO (and presumably during restore), but there should be no harm in leaving this flag set anyway.
>
The driver doesn't like this flag being set on requests to create bo. Since this is meant to be sending to the user the flags userspace will need to recreate the bo, I thought to leave it off.
>
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >
> > +#include "amdgpu_criu.h"
> > +
>
> Why is this header file needed here? None of the new ioctls will be implemented in kfd_chardev.c.
amdgpu_drv.c needs the header with the ioctl declarations for its ioctl definitions. Not sure if there's another way I should be including them.
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Wednesday, June 18, 2025 5:05 PM
To: Francis, David <David.Francis@amd.com>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>
Cc: tvrtko.ursulin@igalia.com <tvrtko.ursulin@igalia.com>; Yat Sin, David <David.YatSin@amd.com>; Freehill, Chris <Chris.Freehill@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; dcostantino@meta.com <dcostantino@meta.com>; sruffell@meta.com <sruffell@meta.com>; simona@ffwll.ch <simona@ffwll.ch>; mripard@kernel.org <mripard@kernel.org>; tzimmermann@suse.de <tzimmermann@suse.de>
Subject: Re: [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
On 2025-06-17 15:45, David Francis wrote:
> Add new ioctl DRM_IOCTL_AMDGPU_CRIU_BO_INFO.
>
> This ioctl returns a list of bos with their handles, sizes,
> and flags and domains.
>
> This ioctl is meant to be used during CRIU checkpoint and
> provide information needed to reconstruct the bos
> in CRIU restore.
>
> Signed-off-by: David Francis <David.Francis@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 144 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 32 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +
> include/uapi/drm/amdgpu_drm.h | 28 +++++
> 6 files changed, 209 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 87080c06e5fc..0863edcdd03f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -63,7 +63,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
> amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> - amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_criu.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
> amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> new file mode 100644
> index 000000000000..34a0358946b6
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> @@ -0,0 +1,144 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright 2025 Advanced Micro Devices, Inc.
> +*
> +* Permission is hereby granted, free of charge, to any person obtaining a
> +* copy of this software and associated documentation files (the "Software"),
> +* to deal in the Software without restriction, including without limitation
> +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +* and/or sell copies of the Software, and to permit persons to whom the
> +* Software is furnished to do so, subject to the following conditions:
> +*
> +* The above copyright notice and this permission notice shall be included in
> +* all copies or substantial portions of the Software.
> +*
> +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +* OTHER DEALINGS IN THE SOFTWARE.
> +*/
> +
> +#include <linux/dma-buf.h>
> +#include <linux/hashtable.h>
> +#include <linux/mutex.h>
> +#include <linux/random.h>
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_file.h>
> +
> +#include "amdgpu_criu.h"
> +
> +#include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
> +#include <drm/drm_exec.h>
> +#include <drm/drm_gem_ttm_helper.h>
> +#include <drm/ttm/ttm_tt.h>
> +#include <linux/interval_tree_generic.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_display.h"
> +#include "amdgpu_dma_buf.h"
> +#include "amdgpu_hmm.h"
> +#include "amdgpu_xgmi.h"
That's a lot of header files. Do you really need them all?
> +
> +static uint32_t hardware_flags_to_uapi_flags(struct amdgpu_device *adev, uint64_t pte_flags)
This function is never called.
> +{
> + uint32_t gem_flags = 0;
> +
> + //This function will be replaced by the mapping flags rework
> +
> + if (pte_flags & AMDGPU_PTE_EXECUTABLE)
> + gem_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
> + if (pte_flags & AMDGPU_PTE_READABLE)
> + gem_flags |= AMDGPU_VM_PAGE_READABLE;
> + if (pte_flags & AMDGPU_PTE_WRITEABLE)
> + gem_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> + if (pte_flags & AMDGPU_PTE_PRT_FLAG(adev))
> + gem_flags |= AMDGPU_VM_PAGE_PRT;
> + if (pte_flags & AMDGPU_PTE_NOALLOC)
> + gem_flags |= AMDGPU_VM_PAGE_NOALLOC;
> +
> + return gem_flags;
> +}
> +
> +
> +/**
> + * amdgpu_criu_bo_info_ioctl - get information about a process' buffer objects
> + *
> + * @dev: drm device pointer
> + * @data: drm_amdgpu_criu_bo_info_args
> + * @filp: drm file pointer
> + *
> + * num_bos is set as an input to the size of the bo_buckets array.
> + * num_bos is sent back as output as the number of bos in the process.
> + * If that number is larger than the size of the array, the ioctl must
> + * be retried.
> + *
> + * Returns:
> + * 0 for success, -errno for errors.
> + */
> +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct drm_amdgpu_criu_bo_info_args *args = data;
> + struct drm_amdgpu_criu_bo_bucket *bo_buckets;
> + struct drm_gem_object *gobj;
> + int id, ret = 0;
> + int bo_index = 0;
> + int num_bos = 0;
> +
> + spin_lock(&filp->table_lock);
> + idr_for_each_entry(&filp->object_idr, gobj, id)
> + num_bos += 1;
> + spin_unlock(&filp->table_lock);
> +
> + if (args->num_bos < num_bos) {
> + args->num_bos = num_bos;
> + goto exit;
Since this path doesn't require any cleanup, you could just return. But maybe this should return an error code to let user mode know that it should retry with a bigger bucket allocation. -ENOSPC?
> + }
> + args->num_bos = num_bos;
> + if (num_bos == 0) {
> + goto exit;
Just return. 0 (success) seems fine here.
> + }
> +
> + bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
> + if (!bo_buckets) {
> + ret = -ENOMEM;
> + goto free_buckets;
Just return -ENOMEM.
> + }
> +
> + spin_lock(&filp->table_lock);
> + idr_for_each_entry(&filp->object_idr, gobj, id) {
> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> + struct drm_amdgpu_criu_bo_bucket *bo_bucket;
> +
> + bo_bucket = &bo_buckets[bo_index];
> +
> + bo_bucket->size = amdgpu_bo_size(bo);
> + bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
I'm not sure why you're removing this flag. I think we always set it implicitly when creating a new BO (and presumably during restore), but there should be no harm in leaving this flag set anyway.
> + bo_bucket->preferred_domains = bo->preferred_domains;
> + bo_bucket->gem_handle = id;
> +
> + if (bo->tbo.base.import_attach)
> + bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_IS_IMPORT;
> +
> + bo_index += 1;
> + }
> + spin_unlock(&filp->table_lock);
> +
> + ret = copy_to_user((void __user *)args->bo_buckets, bo_buckets, num_bos * sizeof(*bo_buckets));
> + if (ret) {
> + pr_debug("Failed to copy BO information to user\n");
> + ret = -EFAULT;
> + }
> +
> +free_buckets:
> + kvfree(bo_buckets);
> +exit:
> +
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> new file mode 100644
> index 000000000000..1b18ffee6587
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright 2025 Advanced Micro Devices, Inc.
> +*
> +* Permission is hereby granted, free of charge, to any person obtaining a
> +* copy of this software and associated documentation files (the "Software"),
> +* to deal in the Software without restriction, including without limitation
> +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +* and/or sell copies of the Software, and to permit persons to whom the
> +* Software is furnished to do so, subject to the following conditions:
> +*
> +* The above copyright notice and this permission notice shall be included in
> +* all copies or substantial portions of the Software.
> +*
> +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +* OTHER DEALINGS IN THE SOFTWARE.
> +*/
> +
> +#ifndef __AMDGPU_CRIU_H__
> +#define __AMDGPU_CRIU_H__
> +
> +#include <drm/amdgpu_drm.h>
Why is this needed here? You're not using any uapi definitions below.
> +
> +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4db92e0a60da..bf33567bb166 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -53,6 +53,7 @@
> #include "amdgpu_xgmi.h"
> #include "amdgpu_userq.h"
> #include "amdgpu_userq_fence.h"
> +#include "amdgpu_criu.h"
> #include "../amdxcp/amdgpu_xcp_drv.h"
>
> /*
> @@ -3021,6 +3022,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_BO_INFO, amdgpu_criu_bo_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> };
>
> static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index a2149afa5803..a8cf2d4580cc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -45,6 +45,8 @@
> #include "amdgpu_dma_buf.h"
> #include "kfd_debug.h"
>
> +#include "amdgpu_criu.h"
> +
Why is this header file needed here? None of the new ioctls will be implemented in kfd_chardev.c.
Regards,
Felix
> static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> static int kfd_open(struct inode *, struct file *);
> static int kfd_release(struct inode *, struct file *);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 45c4fa13499c..1508c55ff92a 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -57,6 +57,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ 0x16
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> +#define DRM_AMDGPU_CRIU_BO_INFO 0x19
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -77,6 +78,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> +#define DRM_IOCTL_AMDGPU_CRIU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_BO_INFO, struct drm_amdgpu_criu_bo_info_args)
>
> /**
> * DOC: memory domains
> @@ -1626,6 +1628,32 @@ struct drm_color_ctm_3x4 {
> __u64 matrix[12];
> };
>
> +#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT (1 << 0)
> +
> +struct drm_amdgpu_criu_bo_info_args {
> + /* IN: Size of bo_buckets buffer. OUT: Number of bos in process (if larger than size of buffer, must retry) */
> + __u32 num_bos;
> +
> + /* User pointer to array of drm_amdgpu_criu_bo_bucket */
> + __u64 bo_buckets;
> +};
> +
> +struct drm_amdgpu_criu_bo_bucket {
> + /* Size of bo */
> + __u64 size;
> +
> + /* GEM_CREATE flags for re-creation of buffer */
> + __u64 alloc_flags;
> +
> + /* Pending how to handle this; provides information needed to remake the buffer on restore */
> + __u32 preferred_domains;
> +
> + /* Currently just one flag: IS_IMPORT */
> + __u32 flags;
> +
> + __u32 gem_handle;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
[-- Attachment #2: Type: text/html, Size: 30689 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
2025-06-19 18:37 ` Francis, David
@ 2025-06-19 19:35 ` Felix Kuehling
0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2025-06-19 19:35 UTC (permalink / raw)
To: Francis, David, dri-devel@lists.freedesktop.org
Cc: tvrtko.ursulin@igalia.com, Yat Sin, David, Freehill, Chris,
Koenig, Christian, dcostantino@meta.com, sruffell@meta.com,
simona@ffwll.ch, mripard@kernel.org, tzimmermann@suse.de
On 2025-06-19 14:37, Francis, David wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
>
> > > + spin_lock(&filp->table_lock);
> > > + idr_for_each_entry(&filp->object_idr, gobj, id)
> > > + num_bos += 1;
> > > + spin_unlock(&filp->table_lock);
> > > +
> > > + if (args->num_bos < num_bos) {
> > > + args->num_bos = num_bos;
> > > + goto exit;
> >
> > Since this path doesn't require any cleanup, you could just return. But maybe this should return an error code to let user mode know that it should retry with a bigger bucket allocation. -ENOSPC?
> >
>
> I wasn't sure if sending back information from an ioctl in the structs while also returning an error value was acceptable. If it is, I'll make that change.
>
> > > + idr_for_each_entry(&filp->object_idr, gobj, id) {
> > > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> > > + struct drm_amdgpu_criu_bo_bucket *bo_bucket;
> > > +
> > > + bo_bucket = &bo_buckets[bo_index];
> > > +
> > > + bo_bucket->size = amdgpu_bo_size(bo);
> > > + bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
> >
> > I'm not sure why you're removing this flag. I think we always set it implicitly when creating a new BO (and presumably during restore), but there should be no harm in leaving this flag set anyway.
> >
>
> The driver doesn't like this flag being set on requests to create bo. Since this is meant to be sending to the user the flags userspace will need to recreate the bo, I thought to leave it off.
OK.
>
> >
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > >
> > > +#include "amdgpu_criu.h"
> > > +
> >
> > Why is this header file needed here? None of the new ioctls will be implemented in kfd_chardev.c.
>
> amdgpu_drv.c needs the header with the ioctl declarations for its ioctl definitions. Not sure if there's another way I should be including them.
Sorry. I put my comment in the wrong place. Yes, you need this header in amdgpu_drv.c. You were also including it in kfd_chardev.c. I don't think you need it there.
Regards,
Felix
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> *From:* Kuehling, Felix <Felix.Kuehling@amd.com>
> *Sent:* Wednesday, June 18, 2025 5:05 PM
> *To:* Francis, David <David.Francis@amd.com>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>
> *Cc:* tvrtko.ursulin@igalia.com <tvrtko.ursulin@igalia.com>; Yat Sin, David <David.YatSin@amd.com>; Freehill, Chris <Chris.Freehill@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; dcostantino@meta.com <dcostantino@meta.com>; sruffell@meta.com <sruffell@meta.com>; simona@ffwll.ch <simona@ffwll.ch>; mripard@kernel.org <mripard@kernel.org>; tzimmermann@suse.de <tzimmermann@suse.de>
> *Subject:* Re: [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info
>
>
> On 2025-06-17 15:45, David Francis wrote:
> > Add new ioctl DRM_IOCTL_AMDGPU_CRIU_BO_INFO.
> >
> > This ioctl returns a list of bos with their handles, sizes,
> > and flags and domains.
> >
> > This ioctl is meant to be used during CRIU checkpoint and
> > provide information needed to reconstruct the bos
> > in CRIU restore.
> >
> > Signed-off-by: David Francis <David.Francis@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c | 144 +++++++++++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h | 32 +++++
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
> > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +
> > include/uapi/drm/amdgpu_drm.h | 28 +++++
> > 6 files changed, 209 insertions(+), 1 deletion(-)
> > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> > index 87080c06e5fc..0863edcdd03f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> > @@ -63,7 +63,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> > amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> > amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
> > amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> > - amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> > + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_criu.o \
> > amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> > amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
> > amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> > new file mode 100644
> > index 000000000000..34a0358946b6
> > --- /dev/null
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.c
> > @@ -0,0 +1,144 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright 2025 Advanced Micro Devices, Inc.
> > +*
> > +* Permission is hereby granted, free of charge, to any person obtaining a
> > +* copy of this software and associated documentation files (the "Software"),
> > +* to deal in the Software without restriction, including without limitation
> > +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > +* and/or sell copies of the Software, and to permit persons to whom the
> > +* Software is furnished to do so, subject to the following conditions:
> > +*
> > +* The above copyright notice and this permission notice shall be included in
> > +* all copies or substantial portions of the Software.
> > +*
> > +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> > +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > +* OTHER DEALINGS IN THE SOFTWARE.
> > +*/
> > +
> > +#include <linux/dma-buf.h>
> > +#include <linux/hashtable.h>
> > +#include <linux/mutex.h>
> > +#include <linux/random.h>
> > +
> > +#include <drm/amdgpu_drm.h>
> > +#include <drm/drm_device.h>
> > +#include <drm/drm_file.h>
> > +
> > +#include "amdgpu_criu.h"
> > +
> > +#include <drm/amdgpu_drm.h>
> > +#include <drm/drm_drv.h>
> > +#include <drm/drm_exec.h>
> > +#include <drm/drm_gem_ttm_helper.h>
> > +#include <drm/ttm/ttm_tt.h>
> > +#include <linux/interval_tree_generic.h>
> > +
> > +#include "amdgpu.h"
> > +#include "amdgpu_display.h"
> > +#include "amdgpu_dma_buf.h"
> > +#include "amdgpu_hmm.h"
> > +#include "amdgpu_xgmi.h"
>
> That's a lot of header files. Do you really need them all?
>
>
> > +
> > +static uint32_t hardware_flags_to_uapi_flags(struct amdgpu_device *adev, uint64_t pte_flags)
>
> This function is never called.
>
>
> > +{
> > + uint32_t gem_flags = 0;
> > +
> > + //This function will be replaced by the mapping flags rework
> > +
> > + if (pte_flags & AMDGPU_PTE_EXECUTABLE)
> > + gem_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
> > + if (pte_flags & AMDGPU_PTE_READABLE)
> > + gem_flags |= AMDGPU_VM_PAGE_READABLE;
> > + if (pte_flags & AMDGPU_PTE_WRITEABLE)
> > + gem_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> > + if (pte_flags & AMDGPU_PTE_PRT_FLAG(adev))
> > + gem_flags |= AMDGPU_VM_PAGE_PRT;
> > + if (pte_flags & AMDGPU_PTE_NOALLOC)
> > + gem_flags |= AMDGPU_VM_PAGE_NOALLOC;
> > +
> > + return gem_flags;
> > +}
> > +
> > +
> > +/**
> > + * amdgpu_criu_bo_info_ioctl - get information about a process' buffer objects
> > + *
> > + * @dev: drm device pointer
> > + * @data: drm_amdgpu_criu_bo_info_args
> > + * @filp: drm file pointer
> > + *
> > + * num_bos is set as an input to the size of the bo_buckets array.
> > + * num_bos is sent back as output as the number of bos in the process.
> > + * If that number is larger than the size of the array, the ioctl must
> > + * be retried.
> > + *
> > + * Returns:
> > + * 0 for success, -errno for errors.
> > + */
> > +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *filp)
> > +{
> > + struct drm_amdgpu_criu_bo_info_args *args = data;
> > + struct drm_amdgpu_criu_bo_bucket *bo_buckets;
> > + struct drm_gem_object *gobj;
> > + int id, ret = 0;
> > + int bo_index = 0;
> > + int num_bos = 0;
> > +
> > + spin_lock(&filp->table_lock);
> > + idr_for_each_entry(&filp->object_idr, gobj, id)
> > + num_bos += 1;
> > + spin_unlock(&filp->table_lock);
> > +
> > + if (args->num_bos < num_bos) {
> > + args->num_bos = num_bos;
> > + goto exit;
>
> Since this path doesn't require any cleanup, you could just return. But maybe this should return an error code to let user mode know that it should retry with a bigger bucket allocation. -ENOSPC?
>
>
> > + }
> > + args->num_bos = num_bos;
> > + if (num_bos == 0) {
> > + goto exit;
>
> Just return. 0 (success) seems fine here.
>
>
> > + }
> > +
> > + bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
> > + if (!bo_buckets) {
> > + ret = -ENOMEM;
> > + goto free_buckets;
>
> Just return -ENOMEM.
>
>
> > + }
> > +
> > + spin_lock(&filp->table_lock);
> > + idr_for_each_entry(&filp->object_idr, gobj, id) {
> > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> > + struct drm_amdgpu_criu_bo_bucket *bo_bucket;
> > +
> > + bo_bucket = &bo_buckets[bo_index];
> > +
> > + bo_bucket->size = amdgpu_bo_size(bo);
> > + bo_bucket->alloc_flags = bo->flags & (~AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE);
>
> I'm not sure why you're removing this flag. I think we always set it implicitly when creating a new BO (and presumably during restore), but there should be no harm in leaving this flag set anyway.
>
>
> > + bo_bucket->preferred_domains = bo->preferred_domains;
> > + bo_bucket->gem_handle = id;
> > +
> > + if (bo->tbo.base.import_attach)
> > + bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_IS_IMPORT;
> > +
> > + bo_index += 1;
> > + }
> > + spin_unlock(&filp->table_lock);
> > +
> > + ret = copy_to_user((void __user *)args->bo_buckets, bo_buckets, num_bos * sizeof(*bo_buckets));
> > + if (ret) {
> > + pr_debug("Failed to copy BO information to user\n");
> > + ret = -EFAULT;
> > + }
> > +
> > +free_buckets:
> > + kvfree(bo_buckets);
> > +exit:
> > +
> > + return ret;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> > new file mode 100644
> > index 000000000000..1b18ffee6587
> > --- /dev/null
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_criu.h
> > @@ -0,0 +1,32 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright 2025 Advanced Micro Devices, Inc.
> > +*
> > +* Permission is hereby granted, free of charge, to any person obtaining a
> > +* copy of this software and associated documentation files (the "Software"),
> > +* to deal in the Software without restriction, including without limitation
> > +* the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > +* and/or sell copies of the Software, and to permit persons to whom the
> > +* Software is furnished to do so, subject to the following conditions:
> > +*
> > +* The above copyright notice and this permission notice shall be included in
> > +* all copies or substantial portions of the Software.
> > +*
> > +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> > +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > +* OTHER DEALINGS IN THE SOFTWARE.
> > +*/
> > +
> > +#ifndef __AMDGPU_CRIU_H__
> > +#define __AMDGPU_CRIU_H__
> > +
> > +#include <drm/amdgpu_drm.h>
>
> Why is this needed here? You're not using any uapi definitions below.
>
>
> > +
> > +int amdgpu_criu_bo_info_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *filp);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 4db92e0a60da..bf33567bb166 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -53,6 +53,7 @@
> > #include "amdgpu_xgmi.h"
> > #include "amdgpu_userq.h"
> > #include "amdgpu_userq_fence.h"
> > +#include "amdgpu_criu.h"
> > #include "../amdxcp/amdgpu_xcp_drv.h"
> >
> > /*
> > @@ -3021,6 +3022,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(AMDGPU_CRIU_BO_INFO, amdgpu_criu_bo_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > };
> >
> > static const struct drm_driver amdgpu_kms_driver = {
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index a2149afa5803..a8cf2d4580cc 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -45,6 +45,8 @@
> > #include "amdgpu_dma_buf.h"
> > #include "kfd_debug.h"
> >
> > +#include "amdgpu_criu.h"
> > +
>
> Why is this header file needed here? None of the new ioctls will be implemented in kfd_chardev.c.
>
> Regards,
> Felix
>
>
> > static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> > static int kfd_open(struct inode *, struct file *);
> > static int kfd_release(struct inode *, struct file *);
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 45c4fa13499c..1508c55ff92a 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -57,6 +57,7 @@ extern "C" {
> > #define DRM_AMDGPU_USERQ 0x16
> > #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> > #define DRM_AMDGPU_USERQ_WAIT 0x18
> > +#define DRM_AMDGPU_CRIU_BO_INFO 0x19
> >
> > #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -77,6 +78,7 @@ extern "C" {
> > #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> > #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> > #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> > +#define DRM_IOCTL_AMDGPU_CRIU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_BO_INFO, struct drm_amdgpu_criu_bo_info_args)
> >
> > /**
> > * DOC: memory domains
> > @@ -1626,6 +1628,32 @@ struct drm_color_ctm_3x4 {
> > __u64 matrix[12];
> > };
> >
> > +#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT (1 << 0)
> > +
> > +struct drm_amdgpu_criu_bo_info_args {
> > + /* IN: Size of bo_buckets buffer. OUT: Number of bos in process (if larger than size of buffer, must retry) */
> > + __u32 num_bos;
> > +
> > + /* User pointer to array of drm_amdgpu_criu_bo_bucket */
> > + __u64 bo_buckets;
> > +};
> > +
> > +struct drm_amdgpu_criu_bo_bucket {
> > + /* Size of bo */
> > + __u64 size;
> > +
> > + /* GEM_CREATE flags for re-creation of buffer */
> > + __u64 alloc_flags;
> > +
> > + /* Pending how to handle this; provides information needed to remake the buffer on restore */
> > + __u32 preferred_domains;
> > +
> > + /* Currently just one flag: IS_IMPORT */
> > + __u32 flags;
> > +
> > + __u32 gem_handle;
> > +};
> > +
> > #if defined(__cplusplus)
> > }
> > #endif
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2025-06-19 19:35 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-17 19:45 [PATCH v7] Add CRIU support for amdgpu dmabuf David Francis
2025-06-17 19:45 ` [PATCH 1/4] drm: Add DRM prime interfaces to reassign GEM handle David Francis
2025-06-17 20:29 ` Felix Kuehling
2025-06-17 19:45 ` [PATCH 2/4] drm/amdgpu: Add CRIU ioctl to get bo info David Francis
2025-06-18 21:05 ` Felix Kuehling
2025-06-19 18:37 ` Francis, David
2025-06-19 19:35 ` Felix Kuehling
2025-06-17 19:45 ` [PATCH 3/4] drm/amdgpu: Add CRIU mapping info ioctl David Francis
2025-06-18 12:42 ` kernel test robot
2025-06-17 19:45 ` [PATCH 4/4] drm/amdgpu: Allow kfd CRIU with no buffer objects David Francis
2025-06-17 21:11 ` Felix Kuehling
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).