* [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:15 ` Christian König
2026-04-20 12:07 ` [RFC/POC PATCH 02/12] drm/amdgpu: add SVM data structures and header Honglei Huang
` (10 subsequent siblings)
11 siblings, 1 reply; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Add amdgpu drm SVM API definitions built on the
DRM GPUSVM framework.
This includes:
- DRM_AMDGPU_GEM_SVM ioctl
- AMDGPU_SVM_FLAG_* flags
- AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
- AMDGPU_SVM_ATTR_* attribute types
- AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
- struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 406a42be4..bed71ed9b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
+#define DRM_AMDGPU_GEM_SVM 0x1a
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
+#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
/**
* DOC: memory domains
@@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
__u64 matrix[12];
};
+#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
+#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
+#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
+#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
+#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
+#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
+#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
+#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
+
+#define AMDGPU_SVM_OP_SET_ATTR 0
+#define AMDGPU_SVM_OP_GET_ATTR 1
+
+#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
+#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
+#define AMDGPU_SVM_ATTR_ACCESS 2
+#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
+#define AMDGPU_SVM_ATTR_NO_ACCESS 4
+#define AMDGPU_SVM_ATTR_SET_FLAGS 5
+#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
+#define AMDGPU_SVM_ATTR_GRANULARITY 7
+
+#define AMDGPU_SVM_LOCATION_SYSMEM 0
+#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
+
+struct drm_amdgpu_svm_attribute {
+ __u32 type;
+ __u32 value;
+};
+
+struct drm_amdgpu_gem_svm {
+ __u64 start_addr;
+ __u64 size;
+ __u32 operation;
+ __u32 nattr;
+ __u64 attrs_ptr;
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 12:07 ` [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions Honglei Huang
@ 2026-04-20 12:15 ` Christian König
2026-04-20 13:30 ` Huang, Honglei1
2026-04-21 3:37 ` Kuehling, Felix
0 siblings, 2 replies; 37+ messages in thread
From: Christian König @ 2026-04-20 12:15 UTC (permalink / raw)
To: Honglei Huang, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
On 4/20/26 14:07, Honglei Huang wrote:
> From: Honglei Huang <honghuan@amd.com>
>
> Add amdgpu drm SVM API definitions built on the
> DRM GPUSVM framework.
>
> This includes:
> - DRM_AMDGPU_GEM_SVM ioctl
> - AMDGPU_SVM_FLAG_* flags
> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
> - AMDGPU_SVM_ATTR_* attribute types
> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>
> Signed-off-by: Honglei Huang <honghuan@amd.com>
> ---
> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 406a42be4..bed71ed9b 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -58,6 +58,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> +#define DRM_AMDGPU_GEM_SVM 0x1a
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -79,6 +80,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>
> /**
> * DOC: memory domains
> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
> __u64 matrix[12];
> };
>
> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
> +
> +#define AMDGPU_SVM_OP_SET_ATTR 0
> +#define AMDGPU_SVM_OP_GET_ATTR 1
> +
> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
> +#define AMDGPU_SVM_ATTR_ACCESS 2
> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
Why are those separate attributes? What is the difference between those?
> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
Why is that separated into set and clear flags?
> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
> +
> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
No location for device local memory?
> +
> +struct drm_amdgpu_svm_attribute {
> + __u32 type;
> + __u32 value;
> +};
> +
> +struct drm_amdgpu_gem_svm {
> + __u64 start_addr;
> + __u64 size;
> + __u32 operation;
> + __u32 nattr;
> + __u64 attrs_ptr;
> +};
Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
And we usually use unions in this header to separate the input from the output parameters.
Regards,
Christian.
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 12:15 ` Christian König
@ 2026-04-20 13:30 ` Huang, Honglei1
2026-04-20 15:37 ` Christian König
2026-04-21 3:37 ` Kuehling, Felix
1 sibling, 1 reply; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-20 13:30 UTC (permalink / raw)
To: Christian König
Cc: Alexander.Deucher, Felix.Kuehling, Oak.Zeng, Jenny-Jing.Liu,
Philip.Yang, Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/20/2026 8:15 PM, Christian König wrote:
>
>
> On 4/20/26 14:07, Honglei Huang wrote:
>> From: Honglei Huang <honghuan@amd.com>
>>
>> Add amdgpu drm SVM API definitions built on the
>> DRM GPUSVM framework.
>>
>> This includes:
>> - DRM_AMDGPU_GEM_SVM ioctl
>> - AMDGPU_SVM_FLAG_* flags
>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>> - AMDGPU_SVM_ATTR_* attribute types
>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>
>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>> ---
>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>> 1 file changed, 39 insertions(+)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 406a42be4..bed71ed9b 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -58,6 +58,7 @@ extern "C" {
>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>
>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>> @@ -79,6 +80,7 @@ extern "C" {
>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>
>> /**
>> * DOC: memory domains
>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>> __u64 matrix[12];
>> };
>>
>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>> +
>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>> +
>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>
> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>
>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>
> Why are those separate attributes? What is the difference between those?
Really thanks for the comments, I have some content mistaken in V2, so I
updated the V3 to fix that. For the header they are same. for other
content please review the V3, sorry about that. And will fix the concern
you raised in next version.
So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS
are clear, GPU can access it or not, and the SVM can set the preferred
location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can
be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
it can not migrate, GPU only can access it in the initial place.
>
>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>
> Why is that separated into set and clear flags?
This method inherits from KFD and is also designed to be compatible with
upper layer applications such as ROCR.
>
>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>> +
>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>
> No location for device local memory?
Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd
for device local memory.
>
>> +
>> +struct drm_amdgpu_svm_attribute {
>> + __u32 type;
>> + __u32 value;
>> +};
>> +
>> +struct drm_amdgpu_gem_svm {
>> + __u64 start_addr;
>> + __u64 size;
>> + __u32 operation;
>> + __u32 nattr;
>> + __u64 attrs_ptr;
>> +};
>
> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>
> And we usually use unions in this header to separate the input from the output parameters.
Got it will add documentation for it and will use unions in next
version. Really thanks for the comments.
Regards,
Honglei
>
> Regards,
> Christian.
>
>> +
>> #if defined(__cplusplus)
>> }
>> #endif
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 13:30 ` Huang, Honglei1
@ 2026-04-20 15:37 ` Christian König
2026-04-20 16:06 ` Matthew Brost
2026-04-23 6:21 ` Huang, Honglei1
0 siblings, 2 replies; 37+ messages in thread
From: Christian König @ 2026-04-20 15:37 UTC (permalink / raw)
To: Huang, Honglei1
Cc: Alexander.Deucher, Felix.Kuehling, Oak.Zeng, Jenny-Jing.Liu,
Philip.Yang, Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/20/26 15:30, Huang, Honglei1 wrote:
> On 4/20/2026 8:15 PM, Christian König wrote:
>>
>>
>> On 4/20/26 14:07, Honglei Huang wrote:
>>> From: Honglei Huang <honghuan@amd.com>
>>>
>>> Add amdgpu drm SVM API definitions built on the
>>> DRM GPUSVM framework.
>>>
>>> This includes:
>>> - DRM_AMDGPU_GEM_SVM ioctl
>>> - AMDGPU_SVM_FLAG_* flags
>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>> - AMDGPU_SVM_ATTR_* attribute types
>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>
>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>> ---
>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>> 1 file changed, 39 insertions(+)
>>>
>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>> index 406a42be4..bed71ed9b 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -58,6 +58,7 @@ extern "C" {
>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>> @@ -79,6 +80,7 @@ extern "C" {
>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>> /**
>>> * DOC: memory domains
>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>> __u64 matrix[12];
>>> };
>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>> +
>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>> +
>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>
>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>
>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>
>> Why are those separate attributes? What is the difference between those?
>
> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>
> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
> it can not migrate, GPU only can access it in the initial place.
Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>
>> Why is that separated into set and clear flags?
>
> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
As far as I can see just a SET_FLAGS should be sufficient.
>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>> +
>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>
>> No location for device local memory?
>
> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
Absolute clear NAK for that approach. This interface is per FD!
We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
We also need to make sure that setting attributes for different devices doesn't affect each other.
Regards,
Christian.
>
>>
>>> +
>>> +struct drm_amdgpu_svm_attribute {
>>> + __u32 type;
>>> + __u32 value;
>>> +};
>>> +
>>> +struct drm_amdgpu_gem_svm {
>>> + __u64 start_addr;
>>> + __u64 size;
>>> + __u32 operation;
>>> + __u32 nattr;
>>> + __u64 attrs_ptr;
>>> +};
>>
>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>
>> And we usually use unions in this header to separate the input from the output parameters.
>
> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>
> Regards,
> Honglei
>
>>
>> Regards,
>> Christian.
>>
>>> +
>>> #if defined(__cplusplus)
>>> }
>>> #endif
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 15:37 ` Christian König
@ 2026-04-20 16:06 ` Matthew Brost
2026-04-20 16:28 ` Thomas Hellström
` (2 more replies)
2026-04-23 6:21 ` Huang, Honglei1
1 sibling, 3 replies; 37+ messages in thread
From: Matthew Brost @ 2026-04-20 16:06 UTC (permalink / raw)
To: Christian König
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
> On 4/20/26 15:30, Huang, Honglei1 wrote:
> > On 4/20/2026 8:15 PM, Christian König wrote:
> >>
> >>
> >> On 4/20/26 14:07, Honglei Huang wrote:
> >>> From: Honglei Huang <honghuan@amd.com>
> >>>
> >>> Add amdgpu drm SVM API definitions built on the
> >>> DRM GPUSVM framework.
> >>>
> >>> This includes:
> >>> - DRM_AMDGPU_GEM_SVM ioctl
> >>> - AMDGPU_SVM_FLAG_* flags
> >>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
> >>> - AMDGPU_SVM_ATTR_* attribute types
> >>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
> >>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
> >>>
> >>> Signed-off-by: Honglei Huang <honghuan@amd.com>
> >>> ---
> >>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
> >>> 1 file changed, 39 insertions(+)
> >>>
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >>> index 406a42be4..bed71ed9b 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -58,6 +58,7 @@ extern "C" {
> >>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> >>> #define DRM_AMDGPU_USERQ_WAIT 0x18
> >>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> >>> +#define DRM_AMDGPU_GEM_SVM 0x1a
> >>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>> @@ -79,6 +80,7 @@ extern "C" {
> >>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> >>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> >>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> >>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
> >>> /**
> >>> * DOC: memory domains
> >>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
> >>> __u64 matrix[12];
> >>> };
> >>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
> >>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
> >>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
> >>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
> >>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
> >>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
> >>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
> >>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
> >>> +
> >>> +#define AMDGPU_SVM_OP_SET_ATTR 0
> >>> +#define AMDGPU_SVM_OP_GET_ATTR 1
> >>> +
> >>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
> >>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
> >>
> >> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
> >>
> >>> +#define AMDGPU_SVM_ATTR_ACCESS 2
> >>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
> >>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
> >>
> >> Why are those separate attributes? What is the difference between those?
> >
> > Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
> >
> > So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
> > it can not migrate, GPU only can access it in the initial place.
>
> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>
> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>
> >>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
> >>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
> >>
> >> Why is that separated into set and clear flags?
> >
> > This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>
> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>
> As far as I can see just a SET_FLAGS should be sufficient.
>
> >>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
> >>> +
> >>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
> >>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
> >>
> >> No location for device local memory?
> >
> > Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
I have no stake in AMD’s uAPI, but I can at least explain how Xe’s uAPI
works here—and admittedly, it’s somewhat goofy.
0 == device-local memory, with first-touch placement on whichever
device/tile touches the memory first
-1 == system memory
≥ 0 == a render-node FD (which could refer to a local or remote device),
paired with a region instance to extract the pgmap for the desired
placement
I believe the reason this isn’t fully FD-based is that the compute UMD
team wasn’t keen on exporting every pgmap as an FD, though that was
something that had been considered.
>
> Absolute clear NAK for that approach. This interface is per FD!
>
> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>
> We also need to make sure that setting attributes for different devices doesn't affect each other.
We landed on the conclusion that it is undefined behavior if different
render FDs—or more specifically, VMs across devices within the same SVM
address space—set different madvise attributes. I believe this was at
Sima's suggestion.
From the UMD point of view, every madvise call therefore becomes:
for_each_fd_vm
set_madvise_attributes
This choice was made to keep madvise attributes local to the per-device
VM structure, rather than introducing some form of cross-device shared
storage.
A misbehaving user can absolutely shoot themselves in the foot, but at
worst this only ends up corrupting behavior within their own process
shared across devices.
Matt
>
> Regards,
> Christian.
>
> >
> >>
> >>> +
> >>> +struct drm_amdgpu_svm_attribute {
> >>> + __u32 type;
> >>> + __u32 value;
> >>> +};
> >>> +
> >>> +struct drm_amdgpu_gem_svm {
> >>> + __u64 start_addr;
> >>> + __u64 size;
> >>> + __u32 operation;
> >>> + __u32 nattr;
> >>> + __u64 attrs_ptr;
> >>> +};
> >>
> >> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
> >>
> >> And we usually use unions in this header to separate the input from the output parameters.
> >
> > Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
> >
> > Regards,
> > Honglei
> >
> >>
> >> Regards,
> >> Christian.
> >>
> >>> +
> >>> #if defined(__cplusplus)
> >>> }
> >>> #endif
> >>
> >
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 16:06 ` Matthew Brost
@ 2026-04-20 16:28 ` Thomas Hellström
2026-04-20 18:07 ` Christian König
2026-04-21 9:52 ` Huang, Honglei1
2 siblings, 0 replies; 37+ messages in thread
From: Thomas Hellström @ 2026-04-20 16:28 UTC (permalink / raw)
To: Matthew Brost, Christian König
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, dakr, aliceryhl, amd-gfx,
dri-devel
On Mon, 2026-04-20 at 09:06 -0700, Matthew Brost wrote:
> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
> > On 4/20/26 15:30, Huang, Honglei1 wrote:
> > > On 4/20/2026 8:15 PM, Christian König wrote:
> > > >
> > > >
> > > > On 4/20/26 14:07, Honglei Huang wrote:
> > > > > From: Honglei Huang <honghuan@amd.com>
> > > > >
> > > > > Add amdgpu drm SVM API definitions built on the
> > > > > DRM GPUSVM framework.
> > > > >
> > > > > This includes:
> > > > > - DRM_AMDGPU_GEM_SVM ioctl
> > > > > - AMDGPU_SVM_FLAG_* flags
> > > > > - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
> > > > > - AMDGPU_SVM_ATTR_* attribute types
> > > > > - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
> > > > > - struct drm_amdgpu_svm_attribute and struct
> > > > > drm_amdgpu_gem_svm
> > > > >
> > > > > Signed-off-by: Honglei Huang <honghuan@amd.com>
> > > > > ---
> > > > > include/uapi/drm/amdgpu_drm.h | 39
> > > > > +++++++++++++++++++++++++++++++++++
> > > > > 1 file changed, 39 insertions(+)
> > > > >
> > > > > diff --git a/include/uapi/drm/amdgpu_drm.h
> > > > > b/include/uapi/drm/amdgpu_drm.h
> > > > > index 406a42be4..bed71ed9b 100644
> > > > > --- a/include/uapi/drm/amdgpu_drm.h
> > > > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > > > @@ -58,6 +58,7 @@ extern "C" {
> > > > > #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> > > > > #define DRM_AMDGPU_USERQ_WAIT 0x18
> > > > > #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> > > > > +#define DRM_AMDGPU_GEM_SVM 0x1a
> > > > > #define DRM_IOCTL_AMDGPU_GEM_CREATE
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> > > > > drm_amdgpu_gem_create)
> > > > > #define DRM_IOCTL_AMDGPU_GEM_MMAP
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union
> > > > > drm_amdgpu_gem_mmap)
> > > > > @@ -79,6 +80,7 @@ extern "C" {
> > > > > #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct
> > > > > drm_amdgpu_userq_signal)
> > > > > #define DRM_IOCTL_AMDGPU_USERQ_WAIT
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
> > > > > drm_amdgpu_userq_wait)
> > > > > #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES,
> > > > > struct drm_amdgpu_gem_list_handles)
> > > > > +#define DRM_IOCTL_AMDGPU_GEM_SVM
> > > > > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct
> > > > > drm_amdgpu_gem_svm)
> > > > > /**
> > > > > * DOC: memory domains
> > > > > @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
> > > > > __u64 matrix[12];
> > > > > };
> > > > > +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
> > > > > +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
> > > > > +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
> > > > > +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
> > > > > +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
> > > > > +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
> > > > > +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
> > > > > +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
> > > > > +
> > > > > +#define AMDGPU_SVM_OP_SET_ATTR 0
> > > > > +#define AMDGPU_SVM_OP_GET_ATTR 1
> > > > > +
> > > > > +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
> > > > > +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
> > > >
> > > > Up till here the interface makes perfect sense, but then it
> > > > becomes a bit fuzzy.
> > > >
> > > > > +#define AMDGPU_SVM_ATTR_ACCESS 2
> > > > > +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
> > > > > +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
> > > >
> > > > Why are those separate attributes? What is the difference
> > > > between those?
> > >
> > > Really thanks for the comments, I have some content mistaken in
> > > V2, so I updated the V3 to fix that. For the header they are
> > > same. for other content please review the V3, sorry about that.
> > > And will fix the concern you raised in next version.
> > >
> > > So the meaning of AMDGPU_SVM_ATTR_ACCESS and
> > > AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not,
> > > and the SVM can set the preferred location, it can be in VRAM or
> > > system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM
> > > and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
> > > it can not migrate, GPU only can access it in the initial place.
> >
> > Yeah but that doesn't then the interface doesn't seem to make sense
> > since such states are mutual exclusive.
> >
> > It would make sense when you have some attribute which is named
> > (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values
> > INACCESSIBLE, IN_PLACE, MIGRATE.
> >
> > > > > +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
> > > > > +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
> > > >
> > > > Why is that separated into set and clear flags?
> > >
> > > This method inherits from KFD and is also designed to be
> > > compatible with upper layer applications such as ROCR.
> >
> > That is *not* sufficient as justification. We need to document why
> > that is necessary and *not* just say ROCR works that way.
> >
> > As far as I can see just a SET_FLAGS should be sufficient.
> >
> > > > > +#define AMDGPU_SVM_ATTR_GRANULARITY 7
> > > > > +
> > > > > +#define AMDGPU_SVM_LOCATION_SYSMEM 0
> > > > > +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
> > > >
> > > > No location for device local memory?
> > >
> > > Vaule > 0 means for device memory, in xe_svm, it seems like it
> > > uses fd for device local memory.
>
> I have no stake in AMD’s uAPI, but I can at least explain how Xe’s
> uAPI
> works here—and admittedly, it’s somewhat goofy.
>
> 0 == device-local memory, with first-touch placement on whichever
> device/tile touches the memory first
>
> -1 == system memory
>
> ≥ 0 == a render-node FD (which could refer to a local or remote
> device),
> paired with a region instance to extract the pgmap for the desired
> placement
>
The first UAPI implementation here actually took a "pagemap fd"
somewhat analogous to a dma-buf fd and that were created using a
separate ioctl on the pagemap device.
IIRC when the RFC was posted, Christian had some concerns with that.
And also when presented to the UMD team they, as Matt writes, preferred
the current approach for simplicity.
The benefit of a pagemap fd is that it would be easily possible to
share other driver's pagemaps, and that the UMD would have explicit
control over the pagemap lifetime. With the current approach a pagemap
may be created during a gpu_madvise() call resulting in an unexpected
(to the app) rather long delay.
That said, it would be easy to support "pagemap fds" as well by just
checking the file type in the gpu_madvise() ioctl.
/Thomas
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 16:06 ` Matthew Brost
2026-04-20 16:28 ` Thomas Hellström
@ 2026-04-20 18:07 ` Christian König
2026-04-21 5:08 ` Matthew Brost
2026-04-21 9:52 ` Huang, Honglei1
2 siblings, 1 reply; 37+ messages in thread
From: Christian König @ 2026-04-20 18:07 UTC (permalink / raw)
To: Matthew Brost
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On 4/20/26 18:06, Matthew Brost wrote:
> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>
>>>>
>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>
>>>>> Add amdgpu drm SVM API definitions built on the
>>>>> DRM GPUSVM framework.
>>>>>
>>>>> This includes:
>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>
>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>> ---
>>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>>> 1 file changed, 39 insertions(+)
>>>>>
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>> index 406a42be4..bed71ed9b 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>> /**
>>>>> * DOC: memory domains
>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>> __u64 matrix[12];
>>>>> };
>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>> +
>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>> +
>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>
>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>
>>>> Why are those separate attributes? What is the difference between those?
>>>
>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>
>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>> it can not migrate, GPU only can access it in the initial place.
>>
>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>
>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>
>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>
>>>> Why is that separated into set and clear flags?
>>>
>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>
>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>
>> As far as I can see just a SET_FLAGS should be sufficient.
>>
>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>> +
>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>
>>>> No location for device local memory?
>>>
>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>
> I have no stake in AMD’s uAPI, but I can at least explain how Xe’s uAPI
> works here—and admittedly, it’s somewhat goofy.
>
> 0 == device-local memory, with first-touch placement on whichever
> device/tile touches the memory first
>
> -1 == system memory
>
> ≥ 0 == a render-node FD (which could refer to a local or remote device),
> paired with a region instance to extract the pgmap for the desired
> placement
>
> I believe the reason this isn’t fully FD-based is that the compute UMD
> team wasn’t keen on exporting every pgmap as an FD, though that was
> something that had been considered.
That absolutely doesn't make sense to me at all.
>
>>
>> Absolute clear NAK for that approach. This interface is per FD!
>>
>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>
>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>
> We landed on the conclusion that it is undefined behavior if different
> render FDs—or more specifically, VMs across devices within the same SVM
> address space—set different madvise attributes. I believe this was at
> Sima's suggestion.
>
> From the UMD point of view, every madvise call therefore becomes:
>
> for_each_fd_vm
> set_madvise_attributes
>
> This choice was made to keep madvise attributes local to the per-device
> VM structure, rather than introducing some form of cross-device shared
> storage.
>
> A misbehaving user can absolutely shoot themselves in the foot, but at
> worst this only ends up corrupting behavior within their own process
> shared across devices.
Yeah that makes totally sense.
As far as I can see the two interfaces contradict each other.
Either you set the information per-device and then each device only gets the information if it needs to migrate the page to it's own local memory or you have global information.
So why does a device fd needs to know about remote pgmap?
Thanks,
Christian.
>
> Matt
>
>>
>> Regards,
>> Christian.
>>
>>>
>>>>
>>>>> +
>>>>> +struct drm_amdgpu_svm_attribute {
>>>>> + __u32 type;
>>>>> + __u32 value;
>>>>> +};
>>>>> +
>>>>> +struct drm_amdgpu_gem_svm {
>>>>> + __u64 start_addr;
>>>>> + __u64 size;
>>>>> + __u32 operation;
>>>>> + __u32 nattr;
>>>>> + __u64 attrs_ptr;
>>>>> +};
>>>>
>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>
>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>
>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +
>>>>> #if defined(__cplusplus)
>>>>> }
>>>>> #endif
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 18:07 ` Christian König
@ 2026-04-21 5:08 ` Matthew Brost
2026-04-21 6:19 ` Christian König
0 siblings, 1 reply; 37+ messages in thread
From: Matthew Brost @ 2026-04-21 5:08 UTC (permalink / raw)
To: Christian König
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On Mon, Apr 20, 2026 at 08:07:38PM +0200, Christian König wrote:
> On 4/20/26 18:06, Matthew Brost wrote:
> > On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
> >> On 4/20/26 15:30, Huang, Honglei1 wrote:
> >>> On 4/20/2026 8:15 PM, Christian König wrote:
> >>>>
> >>>>
> >>>> On 4/20/26 14:07, Honglei Huang wrote:
> >>>>> From: Honglei Huang <honghuan@amd.com>
> >>>>>
> >>>>> Add amdgpu drm SVM API definitions built on the
> >>>>> DRM GPUSVM framework.
> >>>>>
> >>>>> This includes:
> >>>>> - DRM_AMDGPU_GEM_SVM ioctl
> >>>>> - AMDGPU_SVM_FLAG_* flags
> >>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
> >>>>> - AMDGPU_SVM_ATTR_* attribute types
> >>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
> >>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
> >>>>>
> >>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
> >>>>> ---
> >>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
> >>>>> 1 file changed, 39 insertions(+)
> >>>>>
> >>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >>>>> index 406a42be4..bed71ed9b 100644
> >>>>> --- a/include/uapi/drm/amdgpu_drm.h
> >>>>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>>>> @@ -58,6 +58,7 @@ extern "C" {
> >>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> >>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
> >>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> >>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
> >>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>>>> @@ -79,6 +80,7 @@ extern "C" {
> >>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> >>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> >>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> >>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
> >>>>> /**
> >>>>> * DOC: memory domains
> >>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
> >>>>> __u64 matrix[12];
> >>>>> };
> >>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
> >>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
> >>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
> >>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
> >>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
> >>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
> >>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
> >>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
> >>>>> +
> >>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
> >>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
> >>>>> +
> >>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
> >>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
> >>>>
> >>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
> >>>>
> >>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
> >>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
> >>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
> >>>>
> >>>> Why are those separate attributes? What is the difference between those?
> >>>
> >>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
> >>>
> >>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
> >>> it can not migrate, GPU only can access it in the initial place.
> >>
> >> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
> >>
> >> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
> >>
> >>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
> >>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
> >>>>
> >>>> Why is that separated into set and clear flags?
> >>>
> >>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
> >>
> >> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
> >>
> >> As far as I can see just a SET_FLAGS should be sufficient.
> >>
> >>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
> >>>>> +
> >>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
> >>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
> >>>>
> >>>> No location for device local memory?
> >>>
> >>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
> >
> > I have no stake in AMD’s uAPI, but I can at least explain how Xe’s uAPI
> > works here—and admittedly, it’s somewhat goofy.
> >
> > 0 == device-local memory, with first-touch placement on whichever
> > device/tile touches the memory first
> >
> > -1 == system memory
> >
> > ≥ 0 == a render-node FD (which could refer to a local or remote device),
> > paired with a region instance to extract the pgmap for the desired
> > placement
> >
> > I believe the reason this isn’t fully FD-based is that the compute UMD
> > team wasn’t keen on exporting every pgmap as an FD, though that was
> > something that had been considered.
>
> That absolutely doesn't make sense to me at all.
>
> >
> >>
> >> Absolute clear NAK for that approach. This interface is per FD!
> >>
> >> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
> >>
> >> We also need to make sure that setting attributes for different devices doesn't affect each other.
> >
> > We landed on the conclusion that it is undefined behavior if different
> > render FDs—or more specifically, VMs across devices within the same SVM
> > address space—set different madvise attributes. I believe this was at
> > Sima's suggestion.
> >
> > From the UMD point of view, every madvise call therefore becomes:
> >
> > for_each_fd_vm
> > set_madvise_attributes
> >
> > This choice was made to keep madvise attributes local to the per-device
> > VM structure, rather than introducing some form of cross-device shared
> > storage.
> >
> > A misbehaving user can absolutely shoot themselves in the foot, but at
> > worst this only ends up corrupting behavior within their own process
> > shared across devices.
>
> Yeah that makes totally sense.
>
> As far as I can see the two interfaces contradict each other.
>
No.
> Either you set the information per-device and then each device only gets the information if it needs to migrate the page to it's own local memory or you have global information.
>
> So why does a device fd needs to know about remote pgmap?
>
Simplest example:
Devices A and B. The user sets the preferred placement to Device A.
Device B faults first, and Device B moves memory to Device A via remote
pull and can access locally via P2P, scale-up, etc. Avoid a double
bounce once Device A faults.
Matt
> Thanks,
> Christian.
>
> >
> > Matt
> >
> >>
> >> Regards,
> >> Christian.
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +struct drm_amdgpu_svm_attribute {
> >>>>> + __u32 type;
> >>>>> + __u32 value;
> >>>>> +};
> >>>>> +
> >>>>> +struct drm_amdgpu_gem_svm {
> >>>>> + __u64 start_addr;
> >>>>> + __u64 size;
> >>>>> + __u32 operation;
> >>>>> + __u32 nattr;
> >>>>> + __u64 attrs_ptr;
> >>>>> +};
> >>>>
> >>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
> >>>>
> >>>> And we usually use unions in this header to separate the input from the output parameters.
> >>>
> >>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
> >>>
> >>> Regards,
> >>> Honglei
> >>>
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> +
> >>>>> #if defined(__cplusplus)
> >>>>> }
> >>>>> #endif
> >>>>
> >>>
> >>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-21 5:08 ` Matthew Brost
@ 2026-04-21 6:19 ` Christian König
2026-04-21 6:48 ` Matthew Brost
0 siblings, 1 reply; 37+ messages in thread
From: Christian König @ 2026-04-21 6:19 UTC (permalink / raw)
To: Matthew Brost
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On 4/21/26 07:08, Matthew Brost wrote:
> On Mon, Apr 20, 2026 at 08:07:38PM +0200, Christian König wrote:
>> On 4/20/26 18:06, Matthew Brost wrote:
>>> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
...
>> Either you set the information per-device and then each device only gets the information if it needs to migrate the page to it's own local memory or you have global information.
>>
>> So why does a device fd needs to know about remote pgmap?
>>
>
> Simplest example:
>
> Devices A and B. The user sets the preferred placement to Device A.
> Device B faults first, and Device B moves memory to Device A via remote
> pull and can access locally via P2P, scale-up, etc. Avoid a double
> bounce once Device A faults.
So you basically tell the device as soon as you want to access this VA please push the underlying memory away to a different device?
I didn't thought that would be a valid use case. As far as I can see the only thing you avoid is updating your page tables on the faulting device twice.
Thanks for the explanation, that was absolutely not obvious. Going to discuss that with the team on the next meeting.
Christian.
>
> Matt
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-21 6:19 ` Christian König
@ 2026-04-21 6:48 ` Matthew Brost
2026-04-21 7:13 ` Christian König
0 siblings, 1 reply; 37+ messages in thread
From: Matthew Brost @ 2026-04-21 6:48 UTC (permalink / raw)
To: Christian König
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On Tue, Apr 21, 2026 at 08:19:54AM +0200, Christian König wrote:
> On 4/21/26 07:08, Matthew Brost wrote:
> > On Mon, Apr 20, 2026 at 08:07:38PM +0200, Christian König wrote:
> >> On 4/20/26 18:06, Matthew Brost wrote:
> >>> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
> >>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
> >>>>> On 4/20/2026 8:15 PM, Christian König wrote:
> ...
> >> Either you set the information per-device and then each device only gets the information if it needs to migrate the page to it's own local memory or you have global information.
> >>
> >> So why does a device fd needs to know about remote pgmap?
> >>
> >
> > Simplest example:
> >
> > Devices A and B. The user sets the preferred placement to Device A.
> > Device B faults first, and Device B moves memory to Device A via remote
> > pull and can access locally via P2P, scale-up, etc. Avoid a double
> > bounce once Device A faults.
>
> So you basically tell the device as soon as you want to access this VA please push the underlying memory away to a different device?
>
Yes. GPU SVM supports remote pulls.
> I didn't thought that would be a valid use case. As far as I can see the only thing you avoid is updating your page tables on the faulting device twice.
>
TBH, I’m unsure if this will happen in practice, but it also seems
entirely possible. Computers are asynchronous, have random thread
stalls, etc., so out-of-order from expected access isn’t far-fetched.
No on 'updating your page tables twice' being only bad thing.
In my example, let’s say Device B moves memory to itself initially. Then
Device A faults and tries to move the memory to itself.
Bad things:
- Two copies (one from sys->Device B, then Device B -> A)
- Device B faults again after move to Device A - this stalls B's
execution pipeline
Matt
> Thanks for the explanation, that was absolutely not obvious. Going to discuss that with the team on the next meeting.
>
> Christian.
>
> >
> > Matt
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-21 6:48 ` Matthew Brost
@ 2026-04-21 7:13 ` Christian König
0 siblings, 0 replies; 37+ messages in thread
From: Christian König @ 2026-04-21 7:13 UTC (permalink / raw)
To: Matthew Brost
Cc: Huang, Honglei1, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On 4/21/26 08:48, Matthew Brost wrote:
> On Tue, Apr 21, 2026 at 08:19:54AM +0200, Christian König wrote:
>> On 4/21/26 07:08, Matthew Brost wrote:
>>> On Mon, Apr 20, 2026 at 08:07:38PM +0200, Christian König wrote:
>>>> On 4/20/26 18:06, Matthew Brost wrote:
>>>>> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
>>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>> ...
>>>> Either you set the information per-device and then each device only gets the information if it needs to migrate the page to it's own local memory or you have global information.
>>>>
>>>> So why does a device fd needs to know about remote pgmap?
>>>>
>>>
>>> Simplest example:
>>>
>>> Devices A and B. The user sets the preferred placement to Device A.
>>> Device B faults first, and Device B moves memory to Device A via remote
>>> pull and can access locally via P2P, scale-up, etc. Avoid a double
>>> bounce once Device A faults.
>>
>> So you basically tell the device as soon as you want to access this VA please push the underlying memory away to a different device?
>>
>
> Yes. GPU SVM supports remote pulls.
>
>> I didn't thought that would be a valid use case. As far as I can see the only thing you avoid is updating your page tables on the faulting device twice.
>>
>
> TBH, I’m unsure if this will happen in practice, but it also seems
> entirely possible. Computers are asynchronous, have random thread
> stalls, etc., so out-of-order from expected access isn’t far-fetched.
>
> No on 'updating your page tables twice' being only bad thing.
>
> In my example, let’s say Device B moves memory to itself initially. Then
> Device A faults and tries to move the memory to itself.
>
> Bad things:
> - Two copies (one from sys->Device B, then Device B -> A)
> - Device B faults again after move to Device A - this stalls B's
> execution pipeline
Yeah, I mean you can always shoot yourself into the foot if you give different devices contradicting information what to do on access.
But I think for this case you would tell device A to leave the buffer where it is and device B to migrate it locally on access.
So the worst thing which could happen is that device A accesses first, updates it page tables, then device B accesses, page tables of A get invalidated, and page migrated to device B. So at maximum you have one extra page table validation/invalidation.
When you tell both device A and B to migrate to local device memory the page will of course start to play ping/pong between the two devices on access.
The feature doesn't sound that useful, but on the other hand I guess drm_svm has already solved the pgmap lifetime issues resulting from that. So why not?
Thanks,
Christian.
>
> Matt
>
>> Thanks for the explanation, that was absolutely not obvious. Going to discuss that with the team on the next meeting.
>>
>> Christian.
>>
>>>
>>> Matt
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 16:06 ` Matthew Brost
2026-04-20 16:28 ` Thomas Hellström
2026-04-20 18:07 ` Christian König
@ 2026-04-21 9:52 ` Huang, Honglei1
2 siblings, 0 replies; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-21 9:52 UTC (permalink / raw)
To: Matthew Brost
Cc: Christian König, Alexander.Deucher, Felix.Kuehling, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On 4/21/2026 12:06 AM, Matthew Brost wrote:
> On Mon, Apr 20, 2026 at 05:37:43PM +0200, Christian König wrote:
>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>
>>>>
>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>
>>>>> Add amdgpu drm SVM API definitions built on the
>>>>> DRM GPUSVM framework.
>>>>>
>>>>> This includes:
>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>
>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>> ---
>>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>>> 1 file changed, 39 insertions(+)
>>>>>
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>> index 406a42be4..bed71ed9b 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>> /**
>>>>> * DOC: memory domains
>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>> __u64 matrix[12];
>>>>> };
>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>> +
>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>> +
>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>
>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>
>>>> Why are those separate attributes? What is the difference between those?
>>>
>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>
>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>> it can not migrate, GPU only can access it in the initial place.
>>
>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>
>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>
>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>
>>>> Why is that separated into set and clear flags?
>>>
>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>
>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>
>> As far as I can see just a SET_FLAGS should be sufficient.
>>
>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>> +
>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>
>>>> No location for device local memory?
>>>
>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>
> I have no stake in AMD’s uAPI, but I can at least explain how Xe’s uAPI
> works here—and admittedly, it’s somewhat goofy.
>
> 0 == device-local memory, with first-touch placement on whichever
> device/tile touches the memory first
>
> -1 == system memory
>
> ≥ 0 == a render-node FD (which could refer to a local or remote device),
> paired with a region instance to extract the pgmap for the desired
> placement
>
> I believe the reason this isn’t fully FD-based is that the compute UMD
> team wasn’t keen on exporting every pgmap as an FD, though that was
> something that had been considered.
>
Thanks for explaining Xe's SVM interface. I have a quick question :
0 means device-local memory with first-touch placement, and a
render-node FD means explicit placement on a specific device.
What is the difference between 0 and fd points to renderer node it self?
The reason I ask is that for amdgpu, the current design for drm gpu svm
is per FD per SVM per GPU, so device local means the current GPU. And
the behavior for 0, sounds like a global semantics/multi GPU semantics.
This can easily cause conflicts. Is there a plan to add a global
coordination feature in the drm gpu SVM instead of pushing this
responsibility to UMD?
Regards,
Honglei
>>
>> Absolute clear NAK for that approach. This interface is per FD!
>>
>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>
>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>
> We landed on the conclusion that it is undefined behavior if different
> render FDs—or more specifically, VMs across devices within the same SVM
> address space—set different madvise attributes. I believe this was at
> Sima's suggestion.
>
> From the UMD point of view, every madvise call therefore becomes:
>
> for_each_fd_vm
> set_madvise_attributes
>
> This choice was made to keep madvise attributes local to the per-device
> VM structure, rather than introducing some form of cross-device shared
> storage.
>
> A misbehaving user can absolutely shoot themselves in the foot, but at
> worst this only ends up corrupting behavior within their own process
> shared across devices.
>
> Matt
>
>>
>> Regards,
>> Christian.
>>
>>>
>>>>
>>>>> +
>>>>> +struct drm_amdgpu_svm_attribute {
>>>>> + __u32 type;
>>>>> + __u32 value;
>>>>> +};
>>>>> +
>>>>> +struct drm_amdgpu_gem_svm {
>>>>> + __u64 start_addr;
>>>>> + __u64 size;
>>>>> + __u32 operation;
>>>>> + __u32 nattr;
>>>>> + __u64 attrs_ptr;
>>>>> +};
>>>>
>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>
>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>
>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +
>>>>> #if defined(__cplusplus)
>>>>> }
>>>>> #endif
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 15:37 ` Christian König
2026-04-20 16:06 ` Matthew Brost
@ 2026-04-23 6:21 ` Huang, Honglei1
2026-04-23 10:39 ` Christian König
1 sibling, 1 reply; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-23 6:21 UTC (permalink / raw)
To: Christian König, Felix.Kuehling
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/20/2026 11:37 PM, Christian König wrote:
> On 4/20/26 15:30, Huang, Honglei1 wrote:
>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>
>>>
>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>> From: Honglei Huang <honghuan@amd.com>
>>>>
>>>> Add amdgpu drm SVM API definitions built on the
>>>> DRM GPUSVM framework.
>>>>
>>>> This includes:
>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>> - AMDGPU_SVM_FLAG_* flags
>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>
>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>> ---
>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>> 1 file changed, 39 insertions(+)
>>>>
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>> index 406a42be4..bed71ed9b 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>> /**
>>>> * DOC: memory domains
>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>> __u64 matrix[12];
>>>> };
>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>> +
>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>> +
>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>
>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>
>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>
>>> Why are those separate attributes? What is the difference between those?
>>
>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>
>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>> it can not migrate, GPU only can access it in the initial place.
>
> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>
> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
Got it so can I change the UAPI to the following format?
enum amdgpu_ioctl_svm_attr_type {
AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
AMDGPU_IOCTL_SVM_ATTR_ACCESS,
AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
};
enum amdgpu_ioctl_svm_location {
AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
AMDGPU_SVM_ACCESS_IN_PLACE = 2,
AMDGPU_SVM_ACCESS_MIGRATE = 3,
};
>
>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>
>>> Why is that separated into set and clear flags?
>>
>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>
> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>
> As far as I can see just a SET_FLAGS should be sufficient.
Accoding to the reply form Felix, CLR_FLAGS provides a convenient method
for deleting large-scale flags, do we need to redesign this part?
>
>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>> +
>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>
>>> No location for device local memory?
>>
>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>
> Absolute clear NAK for that approach. This interface is per FD!
>
> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>
> We also need to make sure that setting attributes for different devices doesn't affect each other.
I Totally agreed with your thoughts, but according to the reply from
matt, it seems like we need to consider the P2P/multi GPU situation.
So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to
modify the UAPI to align with xe_svm?
Regards,
Honglei
>
> Regards,
> Christian.
>
>>
>>>
>>>> +
>>>> +struct drm_amdgpu_svm_attribute {
>>>> + __u32 type;
>>>> + __u32 value;
>>>> +};
>>>> +
>>>> +struct drm_amdgpu_gem_svm {
>>>> + __u64 start_addr;
>>>> + __u64 size;
>>>> + __u32 operation;
>>>> + __u32 nattr;
>>>> + __u64 attrs_ptr;
>>>> +};
>>>
>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>
>>> And we usually use unions in this header to separate the input from the output parameters.
>>
>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>
>> Regards,
>> Honglei
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>> +
>>>> #if defined(__cplusplus)
>>>> }
>>>> #endif
>>>
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-23 6:21 ` Huang, Honglei1
@ 2026-04-23 10:39 ` Christian König
2026-04-23 11:06 ` Huang, Honglei1
0 siblings, 1 reply; 37+ messages in thread
From: Christian König @ 2026-04-23 10:39 UTC (permalink / raw)
To: Huang, Honglei1, Felix.Kuehling
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/23/26 08:21, Huang, Honglei1 wrote:
>
>
> On 4/20/2026 11:37 PM, Christian König wrote:
>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>
>>>>
>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>
>>>>> Add amdgpu drm SVM API definitions built on the
>>>>> DRM GPUSVM framework.
>>>>>
>>>>> This includes:
>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>
>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>> ---
>>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>>> 1 file changed, 39 insertions(+)
>>>>>
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>> index 406a42be4..bed71ed9b 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>> /**
>>>>> * DOC: memory domains
>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>> __u64 matrix[12];
>>>>> };
>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>> +
>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>> +
>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>
>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>
>>>> Why are those separate attributes? What is the difference between those?
>>>
>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>
>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>> it can not migrate, GPU only can access it in the initial place.
>>
>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>
>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>
> Got it so can I change the UAPI to the following format?
>
> enum amdgpu_ioctl_svm_attr_type {
> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
> };
>
> enum amdgpu_ioctl_svm_location {
The enum name could probably be improved, but apart from that looks reasonable to me.
> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
> AMDGPU_SVM_ACCESS_MIGRATE = 3,
> };
>
>>
>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>
>>>> Why is that separated into set and clear flags?
>>>
>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>
>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>
>> As far as I can see just a SET_FLAGS should be sufficient.
>
> Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
I think we should expose those flags as individual attributes then.
>>
>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>> +
>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>
>>>> No location for device local memory?
>>>
>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>>
>> Absolute clear NAK for that approach. This interface is per FD!
>>
>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>
>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>
>
> I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
>
> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
Regards,
Christian.
>
> Regards,
> Honglei
>
>>
>> Regards,
>> Christian.
>>
>>>
>>>>
>>>>> +
>>>>> +struct drm_amdgpu_svm_attribute {
>>>>> + __u32 type;
>>>>> + __u32 value;
>>>>> +};
>>>>> +
>>>>> +struct drm_amdgpu_gem_svm {
>>>>> + __u64 start_addr;
>>>>> + __u64 size;
>>>>> + __u32 operation;
>>>>> + __u32 nattr;
>>>>> + __u64 attrs_ptr;
>>>>> +};
>>>>
>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>
>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>
>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +
>>>>> #if defined(__cplusplus)
>>>>> }
>>>>> #endif
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-23 10:39 ` Christian König
@ 2026-04-23 11:06 ` Huang, Honglei1
2026-04-23 20:02 ` Matthew Brost
2026-04-24 10:12 ` Huang, Honglei1
0 siblings, 2 replies; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-23 11:06 UTC (permalink / raw)
To: Christian König, Felix.Kuehling
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/23/2026 6:39 PM, Christian König wrote:
> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>
>>
>> On 4/20/2026 11:37 PM, Christian König wrote:
>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>
>>>>>
>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>
>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>> DRM GPUSVM framework.
>>>>>>
>>>>>> This includes:
>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>
>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>> ---
>>>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>>>> 1 file changed, 39 insertions(+)
>>>>>>
>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>> /**
>>>>>> * DOC: memory domains
>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>> __u64 matrix[12];
>>>>>> };
>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>> +
>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>> +
>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>
>>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>>
>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>
>>>>> Why are those separate attributes? What is the difference between those?
>>>>
>>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>>
>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>> it can not migrate, GPU only can access it in the initial place.
>>>
>>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>>
>>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>
>> Got it so can I change the UAPI to the following format?
>>
>> enum amdgpu_ioctl_svm_attr_type {
>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>> };
>>
>> enum amdgpu_ioctl_svm_location {
>
> The enum name could probably be improved, but apart from that looks reasonable to me.
Will improve the name.
>
>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>> };
>>
>>>
>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>
>>>>> Why is that separated into set and clear flags?
>>>>
>>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>>
>>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>>
>>> As far as I can see just a SET_FLAGS should be sufficient.
>>
>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
>
> I think we should expose those flags as individual attributes then.
Got it will do.
>
>>>
>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>> +
>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>
>>>>> No location for device local memory?
>>>>
>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>>>
>>> Absolute clear NAK for that approach. This interface is per FD!
>>>
>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>>
>>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>>
>>
>> I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
>
> When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
>
> When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
>
>>
>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
>
> I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
Got it, for this part maybe require more discussion and time to fully
understand, so this part will remain unchanged in the next version.
Regards,
Honglei
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Honglei
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>>>
>>>>>> +
>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>> + __u32 type;
>>>>>> + __u32 value;
>>>>>> +};
>>>>>> +
>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>> + __u64 start_addr;
>>>>>> + __u64 size;
>>>>>> + __u32 operation;
>>>>>> + __u32 nattr;
>>>>>> + __u64 attrs_ptr;
>>>>>> +};
>>>>>
>>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>>
>>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>>
>>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>>
>>>> Regards,
>>>> Honglei
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> +
>>>>>> #if defined(__cplusplus)
>>>>>> }
>>>>>> #endif
>>>>>
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-23 11:06 ` Huang, Honglei1
@ 2026-04-23 20:02 ` Matthew Brost
2026-04-24 10:20 ` Huang, Honglei1
2026-04-24 10:12 ` Huang, Honglei1
1 sibling, 1 reply; 37+ messages in thread
From: Matthew Brost @ 2026-04-23 20:02 UTC (permalink / raw)
To: Huang, Honglei1
Cc: Christian König, Felix.Kuehling, Alexander.Deucher, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, rodrigo.vivi, thomas.hellstrom, dakr,
aliceryhl, amd-gfx, dri-devel
On Thu, Apr 23, 2026 at 07:06:43PM +0800, Huang, Honglei1 wrote:
>
>
> On 4/23/2026 6:39 PM, Christian König wrote:
> > On 4/23/26 08:21, Huang, Honglei1 wrote:
> > >
> > >
> > > On 4/20/2026 11:37 PM, Christian König wrote:
> > > > On 4/20/26 15:30, Huang, Honglei1 wrote:
> > > > > On 4/20/2026 8:15 PM, Christian König wrote:
> > > > > >
> > > > > >
> > > > > > On 4/20/26 14:07, Honglei Huang wrote:
> > > > > > > From: Honglei Huang <honghuan@amd.com>
> > > > > > >
> > > > > > > Add amdgpu drm SVM API definitions built on the
> > > > > > > DRM GPUSVM framework.
> > > > > > >
> > > > > > > This includes:
> > > > > > > - DRM_AMDGPU_GEM_SVM ioctl
> > > > > > > - AMDGPU_SVM_FLAG_* flags
> > > > > > > - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
> > > > > > > - AMDGPU_SVM_ATTR_* attribute types
> > > > > > > - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
> > > > > > > - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
> > > > > > >
> > > > > > > Signed-off-by: Honglei Huang <honghuan@amd.com>
> > > > > > > ---
> > > > > > > include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
> > > > > > > 1 file changed, 39 insertions(+)
> > > > > > >
> > > > > > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > > > > > index 406a42be4..bed71ed9b 100644
> > > > > > > --- a/include/uapi/drm/amdgpu_drm.h
> > > > > > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > > > > > @@ -58,6 +58,7 @@ extern "C" {
> > > > > > > #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> > > > > > > #define DRM_AMDGPU_USERQ_WAIT 0x18
> > > > > > > #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> > > > > > > +#define DRM_AMDGPU_GEM_SVM 0x1a
> > > > > > > #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > > > > > > #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > > > > > > @@ -79,6 +80,7 @@ extern "C" {
> > > > > > > #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> > > > > > > #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> > > > > > > #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> > > > > > > +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
> > > > > > > /**
> > > > > > > * DOC: memory domains
> > > > > > > @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
> > > > > > > __u64 matrix[12];
> > > > > > > };
> > > > > > > +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
> > > > > > > +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
> > > > > > > +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
> > > > > > > +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
> > > > > > > +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
> > > > > > > +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
> > > > > > > +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
> > > > > > > +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
> > > > > > > +
> > > > > > > +#define AMDGPU_SVM_OP_SET_ATTR 0
> > > > > > > +#define AMDGPU_SVM_OP_GET_ATTR 1
> > > > > > > +
> > > > > > > +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
> > > > > > > +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
> > > > > >
> > > > > > Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
> > > > > >
> > > > > > > +#define AMDGPU_SVM_ATTR_ACCESS 2
> > > > > > > +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
> > > > > > > +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
> > > > > >
> > > > > > Why are those separate attributes? What is the difference between those?
> > > > >
> > > > > Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
> > > > >
> > > > > So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
> > > > > it can not migrate, GPU only can access it in the initial place.
> > > >
> > > > Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
> > > >
> > > > It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
> > >
> > > Got it so can I change the UAPI to the following format?
> > >
> > > enum amdgpu_ioctl_svm_attr_type {
> > > AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
> > > AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
> > > AMDGPU_IOCTL_SVM_ATTR_ACCESS,
> > > AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
> > > AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
> > > AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
> > > };
> > >
> > > enum amdgpu_ioctl_svm_location {
> >
> > The enum name could probably be improved, but apart from that looks reasonable to me.
>
> Will improve the name.
> >
> > > AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
> > > AMDGPU_SVM_ACCESS_IN_PLACE = 2,
> > > AMDGPU_SVM_ACCESS_MIGRATE = 3,
> > > };
> > >
> > > >
> > > > > > > +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
> > > > > > > +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
> > > > > >
> > > > > > Why is that separated into set and clear flags?
> > > > >
> > > > > This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
> > > >
> > > > That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
> > > >
> > > > As far as I can see just a SET_FLAGS should be sufficient.
> > >
> > > Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
> >
> > I think we should expose those flags as individual attributes then.
>
> Got it will do.
>
> >
> > > >
> > > > > > > +#define AMDGPU_SVM_ATTR_GRANULARITY 7
> > > > > > > +
> > > > > > > +#define AMDGPU_SVM_LOCATION_SYSMEM 0
> > > > > > > +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
> > > > > >
> > > > > > No location for device local memory?
> > > > >
> > > > > Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
> > > >
> > > > Absolute clear NAK for that approach. This interface is per FD!
> > > >
> > > > We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
> > > >
> > > > We also need to make sure that setting attributes for different devices doesn't affect each other.
> > >
> > >
> > > I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
> >
> > When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
> >
> > When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
> >
The drm_pagemap has a refcount which prevents it from disapearing. The
refcounting is a little tricky but Thomas wrote it down here [1].
A shrinker is wired to drm_pagemap which when refcount is zero the
drm_pagemap can reclaimed (i.e., free all device pages) as this is a
non-trivial amount of memory (1/64 of VRAM size).
If drm_pagemap doesn't exist or has been reclaimed, next use (e.g.,
create a VM on a device, madvise on pagemap on a report device, etc..)
the drm_pagemap will be created.
[1] https://elixir.bootlin.com/linux/v7.0/source/drivers/gpu/drm/xe/xe_svm.c#L32
> > >
> > > So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
> >
> > I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
Yes, the oddity is partly because madvise / multi-gpu landed out of
order and the pushback on exporting drm_pagemaps as FDs. Making
everything FDs is a completely reasonable uAPI IMO. You'd have to have
system memory FD though but that seems possible.
Matt
>
> Got it, for this part maybe require more discussion and time to fully
> understand, so this part will remain unchanged in the next version.
>
> Regards,
> Honglei
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Regards,
> > > Honglei
> > >
> > > >
> > > > Regards,
> > > > Christian.
> > > >
> > > > >
> > > > > >
> > > > > > > +
> > > > > > > +struct drm_amdgpu_svm_attribute {
> > > > > > > + __u32 type;
> > > > > > > + __u32 value;
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct drm_amdgpu_gem_svm {
> > > > > > > + __u64 start_addr;
> > > > > > > + __u64 size;
> > > > > > > + __u32 operation;
> > > > > > > + __u32 nattr;
> > > > > > > + __u64 attrs_ptr;
> > > > > > > +};
> > > > > >
> > > > > > Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
> > > > > >
> > > > > > And we usually use unions in this header to separate the input from the output parameters.
> > > > >
> > > > > Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
> > > > >
> > > > > Regards,
> > > > > Honglei
> > > > >
> > > > > >
> > > > > > Regards,
> > > > > > Christian.
> > > > > >
> > > > > > > +
> > > > > > > #if defined(__cplusplus)
> > > > > > > }
> > > > > > > #endif
> > > > > >
> > > > >
> > > >
> > >
> >
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-23 20:02 ` Matthew Brost
@ 2026-04-24 10:20 ` Huang, Honglei1
0 siblings, 0 replies; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-24 10:20 UTC (permalink / raw)
To: Matthew Brost
Cc: Christian König, Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu,
Philip.Yang, Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl, amd-gfx,
dri-devel
On 4/24/2026 4:02 AM, Matthew Brost wrote:
> On Thu, Apr 23, 2026 at 07:06:43PM +0800, Huang, Honglei1 wrote:
>>
>>
>> On 4/23/2026 6:39 PM, Christian König wrote:
>>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>>
>>>>
>>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>>
>>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>>> DRM GPUSVM framework.
>>>>>>>>
>>>>>>>> This includes:
>>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>>
>>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>>> ---
>>>>>>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>>> /**
>>>>>>>> * DOC: memory domains
>>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>>> __u64 matrix[12];
>>>>>>>> };
>>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>>
>>>>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>>
>>>>>>> Why are those separate attributes? What is the difference between those?
>>>>>>
>>>>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>>>>
>>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>>
>>>>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>>>>
>>>>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>>>
>>>> Got it so can I change the UAPI to the following format?
>>>>
>>>> enum amdgpu_ioctl_svm_attr_type {
>>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>>> };
>>>>
>>>> enum amdgpu_ioctl_svm_location {
>>>
>>> The enum name could probably be improved, but apart from that looks reasonable to me.
>>
>> Will improve the name.
>>>
>>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>>> };
>>>>
>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>>
>>>>>>> Why is that separated into set and clear flags?
>>>>>>
>>>>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>>>>
>>>>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>>>>
>>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>>
>>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
>>>
>>> I think we should expose those flags as individual attributes then.
>>
>> Got it will do.
>>
>>>
>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>>
>>>>>>> No location for device local memory?
>>>>>>
>>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>>>>>
>>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>>
>>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>>>>
>>>>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>>>>
>>>>
>>>> I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
>>>
>>> When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
>>>
>>> When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
>>>
>
> The drm_pagemap has a refcount which prevents it from disapearing. The
> refcounting is a little tricky but Thomas wrote it down here [1].
>
> A shrinker is wired to drm_pagemap which when refcount is zero the
> drm_pagemap can reclaimed (i.e., free all device pages) as this is a
> non-trivial amount of memory (1/64 of VRAM size).
>
> If drm_pagemap doesn't exist or has been reclaimed, next use (e.g.,
> create a VM on a device, madvise on pagemap on a report device, etc..)
> the drm_pagemap will be created.
>
> [1] https://elixir.bootlin.com/linux/v7.0/source/drivers/gpu/drm/xe/xe_svm.c#L32
>
>>>>
>>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
>>>
>>> I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
>
> Yes, the oddity is partly because madvise / multi-gpu landed out of
> order and the pushback on exporting drm_pagemaps as FDs. Making
> everything FDs is a completely reasonable uAPI IMO. You'd have to have
> system memory FD though but that seems possible.
>
Thanks Matt, this is very helpful.
I agree with your direction here. For the location part, I will run an
experiment in AMDGPU following the approach you suggested.
After I have concrete results, I will return to you.
Regards,
Honglei
> Matt
>
>>
>> Got it, for this part maybe require more discussion and time to fully
>> understand, so this part will remain unchanged in the next version.
>>
>> Regards,
>> Honglei
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Regards,
>>>> Honglei
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>>>
>>>>>>>> +
>>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>>> + __u32 type;
>>>>>>>> + __u32 value;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>>> + __u64 start_addr;
>>>>>>>> + __u64 size;
>>>>>>>> + __u32 operation;
>>>>>>>> + __u32 nattr;
>>>>>>>> + __u64 attrs_ptr;
>>>>>>>> +};
>>>>>>>
>>>>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>>>>
>>>>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>>>>
>>>>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>>>>
>>>>>> Regards,
>>>>>> Honglei
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> +
>>>>>>>> #if defined(__cplusplus)
>>>>>>>> }
>>>>>>>> #endif
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-23 11:06 ` Huang, Honglei1
2026-04-23 20:02 ` Matthew Brost
@ 2026-04-24 10:12 ` Huang, Honglei1
2026-04-27 21:05 ` Felix Kuehling
1 sibling, 1 reply; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-24 10:12 UTC (permalink / raw)
To: Christian König, Felix.Kuehling
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/23/2026 7:06 PM, Huang, Honglei1 wrote:
>
>
> On 4/23/2026 6:39 PM, Christian König wrote:
>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>
>>>
>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>
>>>>>>
>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>
>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>> DRM GPUSVM framework.
>>>>>>>
>>>>>>> This includes:
>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>
>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>> ---
>>>>>>> include/uapi/drm/amdgpu_drm.h | 39 ++++++++++++++++++++++++++
>>>>>>> +++++++++
>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/
>>>>>>> amdgpu_drm.h
>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
>>>>>>> drm_amdgpu_gem_create)
>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union
>>>>>>> drm_amdgpu_gem_mmap)
>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct
>>>>>>> drm_amdgpu_userq_signal)
>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
>>>>>>> drm_amdgpu_userq_wait)
>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct
>>>>>>> drm_amdgpu_gem_list_handles)
>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE +
>>>>>>> DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>> /**
>>>>>>> * DOC: memory domains
>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>> __u64 matrix[12];
>>>>>>> };
>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>> +
>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>> +
>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>
>>>>>> Up till here the interface makes perfect sense, but then it
>>>>>> becomes a bit fuzzy.
>>>>>>
>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>
>>>>>> Why are those separate attributes? What is the difference between
>>>>>> those?
>>>>>
>>>>> Really thanks for the comments, I have some content mistaken in V2,
>>>>> so I updated the V3 to fix that. For the header they are same. for
>>>>> other content please review the V3, sorry about that. And will fix
>>>>> the concern you raised in next version.
>>>>>
>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and
>>>>> AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and
>>>>> the SVM can set the preferred location, it can be in VRAM or
>>>>> system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM
>>>>> and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>
>>>> Yeah but that doesn't then the interface doesn't seem to make sense
>>>> since such states are mutual exclusive.
>>>>
>>>> It would make sense when you have some attribute which is named (for
>>>> example) AMDGPU_SVM_ATTR_ACCESS which can have the values
>>>> INACCESSIBLE, IN_PLACE, MIGRATE.
>>>
>>> Got it so can I change the UAPI to the following format?
>>>
>>> enum amdgpu_ioctl_svm_attr_type {
>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>> };
>>>
>>> enum amdgpu_ioctl_svm_location {
>>
>> The enum name could probably be improved, but apart from that looks
>> reasonable to me.
>
> Will improve the name.
>>
>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>> };
>>>
>>>>
>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>
>>>>>> Why is that separated into set and clear flags?
>>>>>
>>>>> This method inherits from KFD and is also designed to be compatible
>>>>> with upper layer applications such as ROCR.
>>>>
>>>> That is *not* sufficient as justification. We need to document why
>>>> that is necessary and *not* just say ROCR works that way.
>>>>
>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>
>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient
>>> method for deleting large-scale flags, do we need to redesign this part?
>>
>> I think we should expose those flags as individual attributes then.
>
> Got it will do.
>
>>
>>>>
>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>> +
>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>
>>>>>> No location for device local memory?
>>>>>
>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses
>>>>> fd for device local memory.
>>>>
>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>
>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the
>>>> memory should be migrated to the current device.
>>>>
>>>> We also need to make sure that setting attributes for different
>>>> devices doesn't affect each other.
>>>
>>>
>>> I Totally agreed with your thoughts, but according to the reply from
>>> matt, it seems like we need to consider the P2P/multi GPU situation.
>>
>> When the drm_svm or pagemap component has already code to deal with
>> that then it is probably ok to have the same interface.
>>
>> When when XE only hacked that together on their own then that is a bit
>> questionable because getting the lifetime right is usually tricky.
>>
>>>
>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need
>>> to modify the UAPI to align with xe_svm?
>>
>> I think we need to full clarify how XE works here. E.g. that you can
>> specify both 0 as well as give the fd to get the memory migrated to
>> the local device sounds odd.
>
> Got it, for this part maybe require more discussion and time to fully
> understand, so this part will remain unchanged in the next version.
>
Hi Christian, Felix,
Based on the v3 review, I've reworked the SVM UAPI. Please let me know
if anything still looks off before I post v4.
Changes with v3 UAPI:
- OP / ATTR_TYPE / ACCESS / LOCATION converted to documented enums.
- Three ACCESS_* attribute types collapsed into a single
AMDGPU_SVM_ATTR_ACCESS carrying enum amdgpu_ioctl_svm_access.
- ACCESS/MIGRATE renamed to ALLOW_MIGRATE to means permitted, not
required
- SET_FLAGS / CLR_FLAGS removed; each former flag is now its own
attribute carrying enum amdgpu_ioctl_svm_flag_value: CLR / SET
enum amdgpu_ioctl_svm_op {
AMDGPU_SVM_OP_SET_ATTR = 0,
AMDGPU_SVM_OP_GET_ATTR = 1,
};
enum amdgpu_ioctl_svm_attr_type {
AMDGPU_SVM_ATTR_PREFERRED_LOC = 0,
AMDGPU_SVM_ATTR_PREFETCH_LOC = 1,
AMDGPU_SVM_ATTR_ACCESS = 2,
AMDGPU_SVM_ATTR_GRANULARITY = 3,
AMDGPU_SVM_ATTR_HOST_ACCESS = 4,
AMDGPU_SVM_ATTR_COHERENT = 5,
AMDGPU_SVM_ATTR_EXT_COHERENT = 6,
AMDGPU_SVM_ATTR_HIVE_LOCAL = 7,
AMDGPU_SVM_ATTR_GPU_RO = 8,
AMDGPU_SVM_ATTR_GPU_EXEC = 9,
AMDGPU_SVM_ATTR_GPU_READ_MOSTLY = 10,
AMDGPU_SVM_ATTR_GPU_ALWAYS_MAPPED = 11,
};
enum amdgpu_ioctl_svm_access {
AMDGPU_SVM_ACCESS_INACCESSIBLE = 0,
AMDGPU_SVM_ACCESS_IN_PLACE = 1,
AMDGPU_SVM_ACCESS_ALLOW_MIGRATE = 2,
};
enum amdgpu_ioctl_svm_location {
AMDGPU_SVM_LOCATION_SYSMEM = 0,
AMDGPU_SVM_LOCATION_UNDEFINED = 0xffffffff,
};
enum amdgpu_ioctl_svm_flag_value {
AMDGPU_SVM_FLAG_CLR = 0,
AMDGPU_SVM_FLAG_SET = 1,
};
Regards,
Honglei
> Regards,
> Honglei
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>> + __u32 type;
>>>>>>> + __u32 value;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>> + __u64 start_addr;
>>>>>>> + __u64 size;
>>>>>>> + __u32 operation;
>>>>>>> + __u32 nattr;
>>>>>>> + __u64 attrs_ptr;
>>>>>>> +};
>>>>>>
>>>>>> Those struct make perfect sense but clearly need documentation.
>>>>>> Preferable as kerneldoc.
>>>>>>
>>>>>> And we usually use unions in this header to separate the input
>>>>>> from the output parameters.
>>>>>
>>>>> Got it will add documentation for it and will use unions in next
>>>>> version. Really thanks for the comments.
>>>>>
>>>>> Regards,
>>>>> Honglei
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> +
>>>>>>> #if defined(__cplusplus)
>>>>>>> }
>>>>>>> #endif
>>>>>>
>>>>>
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-24 10:12 ` Huang, Honglei1
@ 2026-04-27 21:05 ` Felix Kuehling
2026-04-28 2:24 ` Huang, Honglei1
2026-04-28 6:49 ` Christian König
0 siblings, 2 replies; 37+ messages in thread
From: Felix Kuehling @ 2026-04-27 21:05 UTC (permalink / raw)
To: Huang, Honglei1, Christian König
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 2026-04-24 06:12, Huang, Honglei1 wrote:
>
>
> On 4/23/2026 7:06 PM, Huang, Honglei1 wrote:
>>
>>
>> On 4/23/2026 6:39 PM, Christian König wrote:
>>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>>
>>>>
>>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>>
>>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>>> DRM GPUSVM framework.
>>>>>>>>
>>>>>>>> This includes:
>>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>>
>>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>>> ---
>>>>>>>> include/uapi/drm/amdgpu_drm.h | 39
>>>>>>>> ++++++++++++++++++++++++++ +++++++++
>>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/
>>>>>>>> amdgpu_drm.h
>>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE
>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
>>>>>>>> drm_amdgpu_gem_create)
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
>>>>>>>> + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL
>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct
>>>>>>>> drm_amdgpu_userq_signal)
>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT
>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
>>>>>>>> drm_amdgpu_userq_wait)
>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct
>>>>>>>> drm_amdgpu_gem_list_handles)
>>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE +
>>>>>>>> DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>>> /**
>>>>>>>> * DOC: memory domains
>>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>>> __u64 matrix[12];
>>>>>>>> };
>>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>>
>>>>>>> Up till here the interface makes perfect sense, but then it
>>>>>>> becomes a bit fuzzy.
>>>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>>
>>>>>>> Why are those separate attributes? What is the difference
>>>>>>> between those?
>>>>>>
>>>>>> Really thanks for the comments, I have some content mistaken in
>>>>>> V2, so I updated the V3 to fix that. For the header they are
>>>>>> same. for other content please review the V3, sorry about that.
>>>>>> And will fix the concern you raised in next version.
>>>>>>
>>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and
>>>>>> AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not,
>>>>>> and the SVM can set the preferred location, it can be in VRAM or
>>>>>> system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM
>>>>>> and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>>
>>>>> Yeah but that doesn't then the interface doesn't seem to make
>>>>> sense since such states are mutual exclusive.
>>>>>
>>>>> It would make sense when you have some attribute which is named
>>>>> (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values
>>>>> INACCESSIBLE, IN_PLACE, MIGRATE.
>>>>
>>>> Got it so can I change the UAPI to the following format?
>>>>
>>>> enum amdgpu_ioctl_svm_attr_type {
>>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>>> };
>>>>
>>>> enum amdgpu_ioctl_svm_location {
>>>
>>> The enum name could probably be improved, but apart from that looks
>>> reasonable to me.
>>
>> Will improve the name.
>>>
>>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>>> };
>>>>
>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>>
>>>>>>> Why is that separated into set and clear flags?
>>>>>>
>>>>>> This method inherits from KFD and is also designed to be
>>>>>> compatible with upper layer applications such as ROCR.
>>>>>
>>>>> That is *not* sufficient as justification. We need to document why
>>>>> that is necessary and *not* just say ROCR works that way.
>>>>>
>>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>>
>>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient
>>>> method for deleting large-scale flags, do we need to redesign this
>>>> part?
>>>
>>> I think we should expose those flags as individual attributes then.
>>
>> Got it will do.
>>
>>>
>>>>>
>>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>>> +
>>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>>
>>>>>>> No location for device local memory?
>>>>>>
>>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it
>>>>>> uses fd for device local memory.
>>>>>
>>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>>
>>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the
>>>>> memory should be migrated to the current device.
>>>>>
>>>>> We also need to make sure that setting attributes for different
>>>>> devices doesn't affect each other.
>>>>
>>>>
>>>> I Totally agreed with your thoughts, but according to the reply
>>>> from matt, it seems like we need to consider the P2P/multi GPU
>>>> situation.
>>>
>>> When the drm_svm or pagemap component has already code to deal with
>>> that then it is probably ok to have the same interface.
>>>
>>> When when XE only hacked that together on their own then that is a
>>> bit questionable because getting the lifetime right is usually tricky.
>>>
>>>>
>>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need
>>>> to modify the UAPI to align with xe_svm?
>>>
>>> I think we need to full clarify how XE works here. E.g. that you can
>>> specify both 0 as well as give the fd to get the memory migrated to
>>> the local device sounds odd.
>>
>> Got it, for this part maybe require more discussion and time to fully
>> understand, so this part will remain unchanged in the next version.
>>
>
> Hi Christian, Felix,
>
> Based on the v3 review, I've reworked the SVM UAPI. Please let me know
> if anything still looks off before I post v4.
>
> Changes with v3 UAPI:
> - OP / ATTR_TYPE / ACCESS / LOCATION converted to documented enums.
> - Three ACCESS_* attribute types collapsed into a single
> AMDGPU_SVM_ATTR_ACCESS carrying enum amdgpu_ioctl_svm_access.
> - ACCESS/MIGRATE renamed to ALLOW_MIGRATE to means permitted, not
> required
> - SET_FLAGS / CLR_FLAGS removed; each former flag is now its own
> attribute carrying enum amdgpu_ioctl_svm_flag_value: CLR / SET
>
>
> enum amdgpu_ioctl_svm_op {
> AMDGPU_SVM_OP_SET_ATTR = 0,
> AMDGPU_SVM_OP_GET_ATTR = 1,
> };
>
> enum amdgpu_ioctl_svm_attr_type {
> AMDGPU_SVM_ATTR_PREFERRED_LOC = 0,
> AMDGPU_SVM_ATTR_PREFETCH_LOC = 1,
> AMDGPU_SVM_ATTR_ACCESS = 2,
> AMDGPU_SVM_ATTR_GRANULARITY = 3,
> AMDGPU_SVM_ATTR_HOST_ACCESS = 4,
> AMDGPU_SVM_ATTR_COHERENT = 5,
> AMDGPU_SVM_ATTR_EXT_COHERENT = 6,
> AMDGPU_SVM_ATTR_HIVE_LOCAL = 7,
> AMDGPU_SVM_ATTR_GPU_RO = 8,
> AMDGPU_SVM_ATTR_GPU_EXEC = 9,
> AMDGPU_SVM_ATTR_GPU_READ_MOSTLY = 10,
> AMDGPU_SVM_ATTR_GPU_ALWAYS_MAPPED = 11,
> };
>
> enum amdgpu_ioctl_svm_access {
> AMDGPU_SVM_ACCESS_INACCESSIBLE = 0,
> AMDGPU_SVM_ACCESS_IN_PLACE = 1,
> AMDGPU_SVM_ACCESS_ALLOW_MIGRATE = 2,
> };
>
> enum amdgpu_ioctl_svm_location {
> AMDGPU_SVM_LOCATION_SYSMEM = 0,
> AMDGPU_SVM_LOCATION_UNDEFINED = 0xffffffff,
> };
>
> enum amdgpu_ioctl_svm_flag_value {
> AMDGPU_SVM_FLAG_CLR = 0,
> AMDGPU_SVM_FLAG_SET = 1,
> };
Looks reasonable to me. But I'm not sure you really need enum
amdgpu_ioctl_svm_flag_value. I'd just use 0 and non-zero to mean false
and true.
Regards,
Felix
>
>
> Regards,
> Honglei
>
>> Regards,
>> Honglei
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Regards,
>>>> Honglei
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>>>
>>>>>>>> +
>>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>>> + __u32 type;
>>>>>>>> + __u32 value;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>>> + __u64 start_addr;
>>>>>>>> + __u64 size;
>>>>>>>> + __u32 operation;
>>>>>>>> + __u32 nattr;
>>>>>>>> + __u64 attrs_ptr;
>>>>>>>> +};
>>>>>>>
>>>>>>> Those struct make perfect sense but clearly need documentation.
>>>>>>> Preferable as kerneldoc.
>>>>>>>
>>>>>>> And we usually use unions in this header to separate the input
>>>>>>> from the output parameters.
>>>>>>
>>>>>> Got it will add documentation for it and will use unions in next
>>>>>> version. Really thanks for the comments.
>>>>>>
>>>>>> Regards,
>>>>>> Honglei
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> +
>>>>>>>> #if defined(__cplusplus)
>>>>>>>> }
>>>>>>>> #endif
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-27 21:05 ` Felix Kuehling
@ 2026-04-28 2:24 ` Huang, Honglei1
2026-04-28 6:49 ` Christian König
1 sibling, 0 replies; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-28 2:24 UTC (permalink / raw)
To: Felix Kuehling, Christian König
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/28/2026 5:05 AM, Felix Kuehling wrote:
>
> On 2026-04-24 06:12, Huang, Honglei1 wrote:
>>
>>
>> On 4/23/2026 7:06 PM, Huang, Honglei1 wrote:
>>>
>>>
>>> On 4/23/2026 6:39 PM, Christian König wrote:
>>>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>>>
>>>>>
>>>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>>>
>>>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>>>> DRM GPUSVM framework.
>>>>>>>>>
>>>>>>>>> This includes:
>>>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>>>
>>>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>>>> ---
>>>>>>>>> include/uapi/drm/amdgpu_drm.h | 39 ++++++++++++++++++++++++
>>>>>>>>> ++ +++++++++
>>>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/
>>>>>>>>> amdgpu_drm.h
>>>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE
>>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
>>>>>>>>> drm_amdgpu_gem_create)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
>>>>>>>>> + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL
>>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct
>>>>>>>>> drm_amdgpu_userq_signal)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT
>>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
>>>>>>>>> drm_amdgpu_userq_wait)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
>>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct
>>>>>>>>> drm_amdgpu_gem_list_handles)
>>>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE +
>>>>>>>>> DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>>>> /**
>>>>>>>>> * DOC: memory domains
>>>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>>>> __u64 matrix[12];
>>>>>>>>> };
>>>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>>>
>>>>>>>> Up till here the interface makes perfect sense, but then it
>>>>>>>> becomes a bit fuzzy.
>>>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>>>
>>>>>>>> Why are those separate attributes? What is the difference
>>>>>>>> between those?
>>>>>>>
>>>>>>> Really thanks for the comments, I have some content mistaken in
>>>>>>> V2, so I updated the V3 to fix that. For the header they are
>>>>>>> same. for other content please review the V3, sorry about that.
>>>>>>> And will fix the concern you raised in next version.
>>>>>>>
>>>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and
>>>>>>> AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not,
>>>>>>> and the SVM can set the preferred location, it can be in VRAM or
>>>>>>> system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM
>>>>>>> and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>>>
>>>>>> Yeah but that doesn't then the interface doesn't seem to make
>>>>>> sense since such states are mutual exclusive.
>>>>>>
>>>>>> It would make sense when you have some attribute which is named
>>>>>> (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values
>>>>>> INACCESSIBLE, IN_PLACE, MIGRATE.
>>>>>
>>>>> Got it so can I change the UAPI to the following format?
>>>>>
>>>>> enum amdgpu_ioctl_svm_attr_type {
>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>>>> };
>>>>>
>>>>> enum amdgpu_ioctl_svm_location {
>>>>
>>>> The enum name could probably be improved, but apart from that looks
>>>> reasonable to me.
>>>
>>> Will improve the name.
>>>>
>>>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>>>> };
>>>>>
>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>>>
>>>>>>>> Why is that separated into set and clear flags?
>>>>>>>
>>>>>>> This method inherits from KFD and is also designed to be
>>>>>>> compatible with upper layer applications such as ROCR.
>>>>>>
>>>>>> That is *not* sufficient as justification. We need to document why
>>>>>> that is necessary and *not* just say ROCR works that way.
>>>>>>
>>>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>>>
>>>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient
>>>>> method for deleting large-scale flags, do we need to redesign this
>>>>> part?
>>>>
>>>> I think we should expose those flags as individual attributes then.
>>>
>>> Got it will do.
>>>
>>>>
>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>>>
>>>>>>>> No location for device local memory?
>>>>>>>
>>>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it
>>>>>>> uses fd for device local memory.
>>>>>>
>>>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>>>
>>>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the
>>>>>> memory should be migrated to the current device.
>>>>>>
>>>>>> We also need to make sure that setting attributes for different
>>>>>> devices doesn't affect each other.
>>>>>
>>>>>
>>>>> I Totally agreed with your thoughts, but according to the reply
>>>>> from matt, it seems like we need to consider the P2P/multi GPU
>>>>> situation.
>>>>
>>>> When the drm_svm or pagemap component has already code to deal with
>>>> that then it is probably ok to have the same interface.
>>>>
>>>> When when XE only hacked that together on their own then that is a
>>>> bit questionable because getting the lifetime right is usually tricky.
>>>>
>>>>>
>>>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need
>>>>> to modify the UAPI to align with xe_svm?
>>>>
>>>> I think we need to full clarify how XE works here. E.g. that you can
>>>> specify both 0 as well as give the fd to get the memory migrated to
>>>> the local device sounds odd.
>>>
>>> Got it, for this part maybe require more discussion and time to fully
>>> understand, so this part will remain unchanged in the next version.
>>>
>>
>> Hi Christian, Felix,
>>
>> Based on the v3 review, I've reworked the SVM UAPI. Please let me know
>> if anything still looks off before I post v4.
>>
>> Changes with v3 UAPI:
>> - OP / ATTR_TYPE / ACCESS / LOCATION converted to documented enums.
>> - Three ACCESS_* attribute types collapsed into a single
>> AMDGPU_SVM_ATTR_ACCESS carrying enum amdgpu_ioctl_svm_access.
>> - ACCESS/MIGRATE renamed to ALLOW_MIGRATE to means permitted, not
>> required
>> - SET_FLAGS / CLR_FLAGS removed; each former flag is now its own
>> attribute carrying enum amdgpu_ioctl_svm_flag_value: CLR / SET
>>
>>
>> enum amdgpu_ioctl_svm_op {
>> AMDGPU_SVM_OP_SET_ATTR = 0,
>> AMDGPU_SVM_OP_GET_ATTR = 1,
>> };
>>
>> enum amdgpu_ioctl_svm_attr_type {
>> AMDGPU_SVM_ATTR_PREFERRED_LOC = 0,
>> AMDGPU_SVM_ATTR_PREFETCH_LOC = 1,
>> AMDGPU_SVM_ATTR_ACCESS = 2,
>> AMDGPU_SVM_ATTR_GRANULARITY = 3,
>> AMDGPU_SVM_ATTR_HOST_ACCESS = 4,
>> AMDGPU_SVM_ATTR_COHERENT = 5,
>> AMDGPU_SVM_ATTR_EXT_COHERENT = 6,
>> AMDGPU_SVM_ATTR_HIVE_LOCAL = 7,
>> AMDGPU_SVM_ATTR_GPU_RO = 8,
>> AMDGPU_SVM_ATTR_GPU_EXEC = 9,
>> AMDGPU_SVM_ATTR_GPU_READ_MOSTLY = 10,
>> AMDGPU_SVM_ATTR_GPU_ALWAYS_MAPPED = 11,
>> };
>>
>> enum amdgpu_ioctl_svm_access {
>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 0,
>> AMDGPU_SVM_ACCESS_IN_PLACE = 1,
>> AMDGPU_SVM_ACCESS_ALLOW_MIGRATE = 2,
>> };
>>
>> enum amdgpu_ioctl_svm_location {
>> AMDGPU_SVM_LOCATION_SYSMEM = 0,
>> AMDGPU_SVM_LOCATION_UNDEFINED = 0xffffffff,
>> };
>>
>> enum amdgpu_ioctl_svm_flag_value {
>> AMDGPU_SVM_FLAG_CLR = 0,
>> AMDGPU_SVM_FLAG_SET = 1,
>> };
>
> Looks reasonable to me. But I'm not sure you really need enum
> amdgpu_ioctl_svm_flag_value. I'd just use 0 and non-zero to mean false
> and true.
>
Got it, will remove amdgpu_ioctl_svm_flag_value, and use 0 and non-zero
to mean false or true for the flags.
Regards,
Honglei
> Regards,
> Felix
>
>
>>
>>
>> Regards,
>> Honglei
>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Regards,
>>>>> Honglei
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>>>> + __u32 type;
>>>>>>>>> + __u32 value;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>>>> + __u64 start_addr;
>>>>>>>>> + __u64 size;
>>>>>>>>> + __u32 operation;
>>>>>>>>> + __u32 nattr;
>>>>>>>>> + __u64 attrs_ptr;
>>>>>>>>> +};
>>>>>>>>
>>>>>>>> Those struct make perfect sense but clearly need documentation.
>>>>>>>> Preferable as kerneldoc.
>>>>>>>>
>>>>>>>> And we usually use unions in this header to separate the input
>>>>>>>> from the output parameters.
>>>>>>>
>>>>>>> Got it will add documentation for it and will use unions in next
>>>>>>> version. Really thanks for the comments.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Honglei
>>>>>>>
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> #if defined(__cplusplus)
>>>>>>>>> }
>>>>>>>>> #endif
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-27 21:05 ` Felix Kuehling
2026-04-28 2:24 ` Huang, Honglei1
@ 2026-04-28 6:49 ` Christian König
2026-04-28 7:00 ` Huang, Honglei1
1 sibling, 1 reply; 37+ messages in thread
From: Christian König @ 2026-04-28 6:49 UTC (permalink / raw)
To: Felix Kuehling, Huang, Honglei1
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/27/26 23:05, Felix Kuehling wrote:
>
> On 2026-04-24 06:12, Huang, Honglei1 wrote:
>>
>>
>> On 4/23/2026 7:06 PM, Huang, Honglei1 wrote:
>>>
>>>
>>> On 4/23/2026 6:39 PM, Christian König wrote:
>>>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>>>
>>>>>
>>>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>>>
>>>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>>>> DRM GPUSVM framework.
>>>>>>>>>
>>>>>>>>> This includes:
>>>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>>>
>>>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>>>> ---
>>>>>>>>> include/uapi/drm/amdgpu_drm.h | 39 ++++++++++++++++++++++++++ +++++++++
>>>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/ amdgpu_drm.h
>>>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>>>> /**
>>>>>>>>> * DOC: memory domains
>>>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>>>> __u64 matrix[12];
>>>>>>>>> };
>>>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>>>
>>>>>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>>>
>>>>>>>> Why are those separate attributes? What is the difference between those?
>>>>>>>
>>>>>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>>>>>
>>>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>>>
>>>>>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>>>>>
>>>>>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>>>>
>>>>> Got it so can I change the UAPI to the following format?
>>>>>
>>>>> enum amdgpu_ioctl_svm_attr_type {
>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>>>> };
>>>>>
>>>>> enum amdgpu_ioctl_svm_location {
>>>>
>>>> The enum name could probably be improved, but apart from that looks reasonable to me.
>>>
>>> Will improve the name.
>>>>
>>>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>>>> };
>>>>>
>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>>>
>>>>>>>> Why is that separated into set and clear flags?
>>>>>>>
>>>>>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>>>>>
>>>>>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>>>>>
>>>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>>>
>>>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
>>>>
>>>> I think we should expose those flags as individual attributes then.
>>>
>>> Got it will do.
>>>
>>>>
>>>>>>
>>>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>>>> +
>>>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>>>
>>>>>>>> No location for device local memory?
>>>>>>>
>>>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>>>>>>
>>>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>>>
>>>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>>>>>
>>>>>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>>>>>
>>>>>
>>>>> I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
>>>>
>>>> When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
>>>>
>>>> When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
>>>>
>>>>>
>>>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
>>>>
>>>> I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
>>>
>>> Got it, for this part maybe require more discussion and time to fully understand, so this part will remain unchanged in the next version.
>>>
>>
>> Hi Christian, Felix,
>>
>> Based on the v3 review, I've reworked the SVM UAPI. Please let me know if anything still looks off before I post v4.
>>
>> Changes with v3 UAPI:
>> - OP / ATTR_TYPE / ACCESS / LOCATION converted to documented enums.
>> - Three ACCESS_* attribute types collapsed into a single
>> AMDGPU_SVM_ATTR_ACCESS carrying enum amdgpu_ioctl_svm_access.
>> - ACCESS/MIGRATE renamed to ALLOW_MIGRATE to means permitted, not required
>> - SET_FLAGS / CLR_FLAGS removed; each former flag is now its own
>> attribute carrying enum amdgpu_ioctl_svm_flag_value: CLR / SET
>>
>>
>> enum amdgpu_ioctl_svm_op {
>> AMDGPU_SVM_OP_SET_ATTR = 0,
>> AMDGPU_SVM_OP_GET_ATTR = 1,
>> };
>>
>> enum amdgpu_ioctl_svm_attr_type {
>> AMDGPU_SVM_ATTR_PREFERRED_LOC = 0,
>> AMDGPU_SVM_ATTR_PREFETCH_LOC = 1,
>> AMDGPU_SVM_ATTR_ACCESS = 2,
>> AMDGPU_SVM_ATTR_GRANULARITY = 3,
>> AMDGPU_SVM_ATTR_HOST_ACCESS = 4,
>> AMDGPU_SVM_ATTR_COHERENT = 5,
>> AMDGPU_SVM_ATTR_EXT_COHERENT = 6,
>> AMDGPU_SVM_ATTR_HIVE_LOCAL = 7,
>> AMDGPU_SVM_ATTR_GPU_RO = 8,
>> AMDGPU_SVM_ATTR_GPU_EXEC = 9,
>> AMDGPU_SVM_ATTR_GPU_READ_MOSTLY = 10,
>> AMDGPU_SVM_ATTR_GPU_ALWAYS_MAPPED = 11,
>> };
>>
>> enum amdgpu_ioctl_svm_access {
>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 0,
>> AMDGPU_SVM_ACCESS_IN_PLACE = 1,
>> AMDGPU_SVM_ACCESS_ALLOW_MIGRATE = 2,
>> };
>>
>> enum amdgpu_ioctl_svm_location {
>> AMDGPU_SVM_LOCATION_SYSMEM = 0,
>> AMDGPU_SVM_LOCATION_UNDEFINED = 0xffffffff,
>> };
>>
>> enum amdgpu_ioctl_svm_flag_value {
>> AMDGPU_SVM_FLAG_CLR = 0,
>> AMDGPU_SVM_FLAG_SET = 1,
>> };
>
> Looks reasonable to me. But I'm not sure you really need enum amdgpu_ioctl_svm_flag_value. I'd just use 0 and non-zero to mean false and true.
Yeah agree. We also should have true/false in stdbool.h or by the C standard itself.
Apart from that looks good to me as well.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>>
>>
>> Regards,
>> Honglei
>>
>>> Regards,
>>> Honglei
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Regards,
>>>>> Honglei
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>>>> + __u32 type;
>>>>>>>>> + __u32 value;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>>>> + __u64 start_addr;
>>>>>>>>> + __u64 size;
>>>>>>>>> + __u32 operation;
>>>>>>>>> + __u32 nattr;
>>>>>>>>> + __u64 attrs_ptr;
>>>>>>>>> +};
>>>>>>>>
>>>>>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>>>>>
>>>>>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>>>>>
>>>>>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Honglei
>>>>>>>
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> #if defined(__cplusplus)
>>>>>>>>> }
>>>>>>>>> #endif
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-28 6:49 ` Christian König
@ 2026-04-28 7:00 ` Huang, Honglei1
0 siblings, 0 replies; 37+ messages in thread
From: Huang, Honglei1 @ 2026-04-28 7:00 UTC (permalink / raw)
To: Christian König, Felix Kuehling
Cc: Alexander.Deucher, Oak.Zeng, Jenny-Jing.Liu, Philip.Yang,
Xiaogang.Chen, Ray.Huang, Lingshan.Zhu, Junhua.Shen,
matthew.brost, rodrigo.vivi, thomas.hellstrom, dakr, aliceryhl,
amd-gfx, dri-devel
On 4/28/2026 2:49 PM, Christian König wrote:
>
>
> On 4/27/26 23:05, Felix Kuehling wrote:
>>
>> On 2026-04-24 06:12, Huang, Honglei1 wrote:
>>>
>>>
>>> On 4/23/2026 7:06 PM, Huang, Honglei1 wrote:
>>>>
>>>>
>>>> On 4/23/2026 6:39 PM, Christian König wrote:
>>>>> On 4/23/26 08:21, Huang, Honglei1 wrote:
>>>>>>
>>>>>>
>>>>>> On 4/20/2026 11:37 PM, Christian König wrote:
>>>>>>> On 4/20/26 15:30, Huang, Honglei1 wrote:
>>>>>>>> On 4/20/2026 8:15 PM, Christian König wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 4/20/26 14:07, Honglei Huang wrote:
>>>>>>>>>> From: Honglei Huang <honghuan@amd.com>
>>>>>>>>>>
>>>>>>>>>> Add amdgpu drm SVM API definitions built on the
>>>>>>>>>> DRM GPUSVM framework.
>>>>>>>>>>
>>>>>>>>>> This includes:
>>>>>>>>>> - DRM_AMDGPU_GEM_SVM ioctl
>>>>>>>>>> - AMDGPU_SVM_FLAG_* flags
>>>>>>>>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>>>>>>>>> - AMDGPU_SVM_ATTR_* attribute types
>>>>>>>>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>>>>>>>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>>>>>>>>> ---
>>>>>>>>>> include/uapi/drm/amdgpu_drm.h | 39 ++++++++++++++++++++++++++ +++++++++
>>>>>>>>>> 1 file changed, 39 insertions(+)
>>>>>>>>>>
>>>>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/ amdgpu_drm.h
>>>>>>>>>> index 406a42be4..bed71ed9b 100644
>>>>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>>>>> @@ -58,6 +58,7 @@ extern "C" {
>>>>>>>>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>>>>>>>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>>>>>>>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>>>>>>>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>>>>> @@ -79,6 +80,7 @@ extern "C" {
>>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>>>>>>>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>>>>>>>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>>>>>>>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>>>>>>>>> /**
>>>>>>>>>> * DOC: memory domains
>>>>>>>>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>>>>>>>>> __u64 matrix[12];
>>>>>>>>>> };
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>>>>>>>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>>>>>>>>> +
>>>>>>>>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>>>>>>>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>>>>>>>>> +
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>>>>>>>>>
>>>>>>>>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>>>>>>>>
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>>>>>>>>>
>>>>>>>>> Why are those separate attributes? What is the difference between those?
>>>>>>>>
>>>>>>>> Really thanks for the comments, I have some content mistaken in V2, so I updated the V3 to fix that. For the header they are same. for other content please review the V3, sorry about that. And will fix the concern you raised in next version.
>>>>>>>>
>>>>>>>> So the meaning of AMDGPU_SVM_ATTR_ACCESS and AMDGPU_SVM_ATTR_NO_ACCESS are clear, GPU can access it or not, and the SVM can set the preferred location, it can be in VRAM or system, for AMDGPU_SVM_ATTR_ACCESS it can be migrated between RAM and VRAM. For AMDGPU_SVM_ATTR_ACCESS_IN_PLACE,
>>>>>>>> it can not migrate, GPU only can access it in the initial place.
>>>>>>>
>>>>>>> Yeah but that doesn't then the interface doesn't seem to make sense since such states are mutual exclusive.
>>>>>>>
>>>>>>> It would make sense when you have some attribute which is named (for example) AMDGPU_SVM_ATTR_ACCESS which can have the values INACCESSIBLE, IN_PLACE, MIGRATE.
>>>>>>
>>>>>> Got it so can I change the UAPI to the following format?
>>>>>>
>>>>>> enum amdgpu_ioctl_svm_attr_type {
>>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFERRED_LOC,
>>>>>> AMDGPU_IOCTL_SVM_ATTR_PREFETCH_LOC,
>>>>>> AMDGPU_IOCTL_SVM_ATTR_ACCESS,
>>>>>> AMDGPU_IOCTL_SVM_ATTR_SET_FLAGS,
>>>>>> AMDGPU_IOCTL_SVM_ATTR_CLR_FLAGS,
>>>>>> AMDGPU_IOCTL_SVM_ATTR_GRANULARITY
>>>>>> };
>>>>>>
>>>>>> enum amdgpu_ioctl_svm_location {
>>>>>
>>>>> The enum name could probably be improved, but apart from that looks reasonable to me.
>>>>
>>>> Will improve the name.
>>>>>
>>>>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 1,
>>>>>> AMDGPU_SVM_ACCESS_IN_PLACE = 2,
>>>>>> AMDGPU_SVM_ACCESS_MIGRATE = 3,
>>>>>> };
>>>>>>
>>>>>>>
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>>>>>>>>>
>>>>>>>>> Why is that separated into set and clear flags?
>>>>>>>>
>>>>>>>> This method inherits from KFD and is also designed to be compatible with upper layer applications such as ROCR.
>>>>>>>
>>>>>>> That is *not* sufficient as justification. We need to document why that is necessary and *not* just say ROCR works that way.
>>>>>>>
>>>>>>> As far as I can see just a SET_FLAGS should be sufficient.
>>>>>>
>>>>>> Accoding to the reply form Felix, CLR_FLAGS provides a convenient method for deleting large-scale flags, do we need to redesign this part?
>>>>>
>>>>> I think we should expose those flags as individual attributes then.
>>>>
>>>> Got it will do.
>>>>
>>>>>
>>>>>>>
>>>>>>>>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>>>>>>>>> +
>>>>>>>>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>>>>>>>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>>>>>>>>>
>>>>>>>>> No location for device local memory?
>>>>>>>>
>>>>>>>> Vaule > 0 means for device memory, in xe_svm, it seems like it uses fd for device local memory.
>>>>>>>
>>>>>>> Absolute clear NAK for that approach. This interface is per FD!
>>>>>>>
>>>>>>> We need some value AMDGPU_SVM_LOCATION_DEVICE which means that the memory should be migrated to the current device.
>>>>>>>
>>>>>>> We also need to make sure that setting attributes for different devices doesn't affect each other.
>>>>>>
>>>>>>
>>>>>> I Totally agreed with your thoughts, but according to the reply from matt, it seems like we need to consider the P2P/multi GPU situation.
>>>>>
>>>>> When the drm_svm or pagemap component has already code to deal with that then it is probably ok to have the same interface.
>>>>>
>>>>> When when XE only hacked that together on their own then that is a bit questionable because getting the lifetime right is usually tricky.
>>>>>
>>>>>>
>>>>>> So do I need to add a AMDGPU_SVM_LOCATION_DEVICE flag or do I need to modify the UAPI to align with xe_svm?
>>>>>
>>>>> I think we need to full clarify how XE works here. E.g. that you can specify both 0 as well as give the fd to get the memory migrated to the local device sounds odd.
>>>>
>>>> Got it, for this part maybe require more discussion and time to fully understand, so this part will remain unchanged in the next version.
>>>>
>>>
>>> Hi Christian, Felix,
>>>
>>> Based on the v3 review, I've reworked the SVM UAPI. Please let me know if anything still looks off before I post v4.
>>>
>>> Changes with v3 UAPI:
>>> - OP / ATTR_TYPE / ACCESS / LOCATION converted to documented enums.
>>> - Three ACCESS_* attribute types collapsed into a single
>>> AMDGPU_SVM_ATTR_ACCESS carrying enum amdgpu_ioctl_svm_access.
>>> - ACCESS/MIGRATE renamed to ALLOW_MIGRATE to means permitted, not required
>>> - SET_FLAGS / CLR_FLAGS removed; each former flag is now its own
>>> attribute carrying enum amdgpu_ioctl_svm_flag_value: CLR / SET
>>>
>>>
>>> enum amdgpu_ioctl_svm_op {
>>> AMDGPU_SVM_OP_SET_ATTR = 0,
>>> AMDGPU_SVM_OP_GET_ATTR = 1,
>>> };
>>>
>>> enum amdgpu_ioctl_svm_attr_type {
>>> AMDGPU_SVM_ATTR_PREFERRED_LOC = 0,
>>> AMDGPU_SVM_ATTR_PREFETCH_LOC = 1,
>>> AMDGPU_SVM_ATTR_ACCESS = 2,
>>> AMDGPU_SVM_ATTR_GRANULARITY = 3,
>>> AMDGPU_SVM_ATTR_HOST_ACCESS = 4,
>>> AMDGPU_SVM_ATTR_COHERENT = 5,
>>> AMDGPU_SVM_ATTR_EXT_COHERENT = 6,
>>> AMDGPU_SVM_ATTR_HIVE_LOCAL = 7,
>>> AMDGPU_SVM_ATTR_GPU_RO = 8,
>>> AMDGPU_SVM_ATTR_GPU_EXEC = 9,
>>> AMDGPU_SVM_ATTR_GPU_READ_MOSTLY = 10,
>>> AMDGPU_SVM_ATTR_GPU_ALWAYS_MAPPED = 11,
>>> };
>>>
>>> enum amdgpu_ioctl_svm_access {
>>> AMDGPU_SVM_ACCESS_INACCESSIBLE = 0,
>>> AMDGPU_SVM_ACCESS_IN_PLACE = 1,
>>> AMDGPU_SVM_ACCESS_ALLOW_MIGRATE = 2,
>>> };
>>>
>>> enum amdgpu_ioctl_svm_location {
>>> AMDGPU_SVM_LOCATION_SYSMEM = 0,
>>> AMDGPU_SVM_LOCATION_UNDEFINED = 0xffffffff,
>>> };
>>>
>>> enum amdgpu_ioctl_svm_flag_value {
>>> AMDGPU_SVM_FLAG_CLR = 0,
>>> AMDGPU_SVM_FLAG_SET = 1,
>>> };
>>
>> Looks reasonable to me. But I'm not sure you really need enum amdgpu_ioctl_svm_flag_value. I'd just use 0 and non-zero to mean false and true.
>
> Yeah agree. We also should have true/false in stdbool.h or by the C standard itself.
>
> Apart from that looks good to me as well.
Got it, will remove amdgpu_ioctl_svm_flag_value according to the request.
Regards,
Honglei
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Felix
>>
>>
>>>
>>>
>>> Regards,
>>> Honglei
>>>
>>>> Regards,
>>>> Honglei
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Honglei
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> +struct drm_amdgpu_svm_attribute {
>>>>>>>>>> + __u32 type;
>>>>>>>>>> + __u32 value;
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +struct drm_amdgpu_gem_svm {
>>>>>>>>>> + __u64 start_addr;
>>>>>>>>>> + __u64 size;
>>>>>>>>>> + __u32 operation;
>>>>>>>>>> + __u32 nattr;
>>>>>>>>>> + __u64 attrs_ptr;
>>>>>>>>>> +};
>>>>>>>>>
>>>>>>>>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>>>>>>>>
>>>>>>>>> And we usually use unions in this header to separate the input from the output parameters.
>>>>>>>>
>>>>>>>> Got it will add documentation for it and will use unions in next version. Really thanks for the comments.
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Honglei
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Christian.
>>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> #if defined(__cplusplus)
>>>>>>>>>> }
>>>>>>>>>> #endif
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-20 12:15 ` Christian König
2026-04-20 13:30 ` Huang, Honglei1
@ 2026-04-21 3:37 ` Kuehling, Felix
2026-04-21 6:39 ` Christian König
1 sibling, 1 reply; 37+ messages in thread
From: Kuehling, Felix @ 2026-04-21 3:37 UTC (permalink / raw)
To: Christian König, Honglei Huang, Alexander.Deucher, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
On 2026-04-20 08:15, Christian König wrote:
>
> On 4/20/26 14:07, Honglei Huang wrote:
>> From: Honglei Huang <honghuan@amd.com>
>>
>> Add amdgpu drm SVM API definitions built on the
>> DRM GPUSVM framework.
>>
>> This includes:
>> - DRM_AMDGPU_GEM_SVM ioctl
>> - AMDGPU_SVM_FLAG_* flags
>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>> - AMDGPU_SVM_ATTR_* attribute types
>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>
>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>> ---
>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>> 1 file changed, 39 insertions(+)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 406a42be4..bed71ed9b 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -58,6 +58,7 @@ extern "C" {
>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>
>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>> @@ -79,6 +80,7 @@ extern "C" {
>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>
>> /**
>> * DOC: memory domains
>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>> __u64 matrix[12];
>> };
>>
>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>> +
>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>> +
>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>
>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
> Why are those separate attributes? What is the difference between those?
>
>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
> Why is that separated into set and clear flags?
This looks like it's based on the KFD SVM API. We created this so we
could set or clear specific flags for address ranges without know what
other flags were set or not set on different parts of those address
ranges already.
E.g. you may have set an RO flag for pages 1-5, and set a COHERENT flag
for pages 3-7. Now you want to clear EXEC for pages 0-8. If you specify
an exact flags parameter, you wipe out all those other settings that
have different values for different pages in the range. Instead this API
lets you say "clear the EXEC flag on pages 0-8 without touching any of
the other flags".
Alternatively we could have made all those flags completely separate
boolean attributes. Making them flags that can be set/cleared in this
way is more economical for how they are stored and manipulated.
Regards,
Felix
>
>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>> +
>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
> No location for device local memory?
>
>> +
>> +struct drm_amdgpu_svm_attribute {
>> + __u32 type;
>> + __u32 value;
>> +};
>> +
>> +struct drm_amdgpu_gem_svm {
>> + __u64 start_addr;
>> + __u64 size;
>> + __u32 operation;
>> + __u32 nattr;
>> + __u64 attrs_ptr;
>> +};
> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>
> And we usually use unions in this header to separate the input from the output parameters.
>
> Regards,
> Christian.
>
>> +
>> #if defined(__cplusplus)
>> }
>> #endif
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions
2026-04-21 3:37 ` Kuehling, Felix
@ 2026-04-21 6:39 ` Christian König
0 siblings, 0 replies; 37+ messages in thread
From: Christian König @ 2026-04-21 6:39 UTC (permalink / raw)
To: Kuehling, Felix, Honglei Huang, Alexander.Deucher, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
On 4/21/26 05:37, Kuehling, Felix wrote:
>
> On 2026-04-20 08:15, Christian König wrote:
>>
>> On 4/20/26 14:07, Honglei Huang wrote:
>>> From: Honglei Huang <honghuan@amd.com>
>>>
>>> Add amdgpu drm SVM API definitions built on the
>>> DRM GPUSVM framework.
>>>
>>> This includes:
>>> - DRM_AMDGPU_GEM_SVM ioctl
>>> - AMDGPU_SVM_FLAG_* flags
>>> - AMDGPU_SVM_OP_SET_ATTR / AMDGPU_SVM_OP_GET_ATTR operations
>>> - AMDGPU_SVM_ATTR_* attribute types
>>> - AMDGPU_SVM_LOCATION_SYSMEM / AMDGPU_SVM_LOCATION_UNDEFINED
>>> - struct drm_amdgpu_svm_attribute and struct drm_amdgpu_gem_svm
>>>
>>> Signed-off-by: Honglei Huang <honghuan@amd.com>
>>> ---
>>> include/uapi/drm/amdgpu_drm.h | 39 +++++++++++++++++++++++++++++++++++
>>> 1 file changed, 39 insertions(+)
>>>
>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>> index 406a42be4..bed71ed9b 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -58,6 +58,7 @@ extern "C" {
>>> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
>>> #define DRM_AMDGPU_USERQ_WAIT 0x18
>>> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
>>> +#define DRM_AMDGPU_GEM_SVM 0x1a
>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>> @@ -79,6 +80,7 @@ extern "C" {
>>> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>>> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>>> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>>> +#define DRM_IOCTL_AMDGPU_GEM_SVM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_SVM, struct drm_amdgpu_gem_svm)
>>> /**
>>> * DOC: memory domains
>>> @@ -1665,6 +1667,43 @@ struct drm_color_ctm_3x4 {
>>> __u64 matrix[12];
>>> };
>>> +#define AMDGPU_SVM_FLAG_HOST_ACCESS 0x00000001
>>> +#define AMDGPU_SVM_FLAG_COHERENT 0x00000002
>>> +#define AMDGPU_SVM_FLAG_HIVE_LOCAL 0x00000004
>>> +#define AMDGPU_SVM_FLAG_GPU_RO 0x00000008
>>> +#define AMDGPU_SVM_FLAG_GPU_EXEC 0x00000010
>>> +#define AMDGPU_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
>>> +#define AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040
>>> +#define AMDGPU_SVM_FLAG_EXT_COHERENT 0x00000080
>>> +
>>> +#define AMDGPU_SVM_OP_SET_ATTR 0
>>> +#define AMDGPU_SVM_OP_GET_ATTR 1
>>> +
>>> +#define AMDGPU_SVM_ATTR_PREFERRED_LOC 0
>>> +#define AMDGPU_SVM_ATTR_PREFETCH_LOC 1
>> Up till here the interface makes perfect sense, but then it becomes a bit fuzzy.
>>
>>> +#define AMDGPU_SVM_ATTR_ACCESS 2
>>> +#define AMDGPU_SVM_ATTR_ACCESS_IN_PLACE 3
>>> +#define AMDGPU_SVM_ATTR_NO_ACCESS 4
>> Why are those separate attributes? What is the difference between those?
>>
>>> +#define AMDGPU_SVM_ATTR_SET_FLAGS 5
>>> +#define AMDGPU_SVM_ATTR_CLR_FLAGS 6
>> Why is that separated into set and clear flags?
>
> This looks like it's based on the KFD SVM API. We created this so we could set or clear specific flags for address ranges without know what other flags were set or not set on different parts of those address ranges already.
>
> E.g. you may have set an RO flag for pages 1-5, and set a COHERENT flag for pages 3-7. Now you want to clear EXEC for pages 0-8. If you specify an exact flags parameter, you wipe out all those other settings that have different values for different pages in the range. Instead this API lets you say "clear the EXEC flag on pages 0-8 without touching any of the other flags".
>
> Alternatively we could have made all those flags completely separate boolean attributes. Making them flags that can be set/cleared in this way is more economical for how they are stored and manipulated.
Yeah that's exactly what I thought as solution as well.
I mean the kernel can store them internally as flags, but we should clearly have a clean and coherent uAPI for them.
Regards,
Christian.
>
> Regards,
> Felix
>
>
>>
>>> +#define AMDGPU_SVM_ATTR_GRANULARITY 7
>>> +
>>> +#define AMDGPU_SVM_LOCATION_SYSMEM 0
>>> +#define AMDGPU_SVM_LOCATION_UNDEFINED 0xffffffff
>> No location for device local memory?
>>
>>> +
>>> +struct drm_amdgpu_svm_attribute {
>>> + __u32 type;
>>> + __u32 value;
>>> +};
>>> +
>>> +struct drm_amdgpu_gem_svm {
>>> + __u64 start_addr;
>>> + __u64 size;
>>> + __u32 operation;
>>> + __u32 nattr;
>>> + __u64 attrs_ptr;
>>> +};
>> Those struct make perfect sense but clearly need documentation. Preferable as kerneldoc.
>>
>> And we usually use unions in this header to separate the input from the output parameters.
>>
>> Regards,
>> Christian.
>>
>>> +
>>> #if defined(__cplusplus)
>>> }
>>> #endif
^ permalink raw reply [flat|nested] 37+ messages in thread
* [RFC/POC PATCH 02/12] drm/amdgpu: add SVM data structures and header
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 03/12] drm/amdgpu: add SVM attribute data structures Honglei Huang
` (9 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
This includes:
- struct amdgpu_svm contains drm_gpusvm, refcount,
attr_tree, workqueues, locks, atomics, and per-mode callbacks
- Helper macros and functions
- Function declarations with CONFIG_DRM_AMDGPU_SVM guards and inline
stubs
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h | 147 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +
2 files changed, 151 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
new file mode 100644
index 000000000..a1bfe8b47
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SVM_H__
+#define __AMDGPU_SVM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gpusvm.h>
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/printk.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct amdgpu_device;
+struct amdgpu_vm;
+struct amdgpu_svm_attr_tree;
+struct drm_device;
+struct drm_file;
+
+#define AMDGPU_SVM_TRACE(fmt, ...) \
+ pr_debug("%s: " fmt, __func__, ##__VA_ARGS__)
+
+#define AMDGPU_SVM_KMEM_CACHE_CREATE(name, type) \
+ kmem_cache_create((name), sizeof(type), 0, 0, NULL)
+
+#define AMDGPU_SVM_KMEM_CACHE_DESTROY(cache) \
+ do { \
+ if ((cache) != NULL) { \
+ kmem_cache_destroy((cache)); \
+ (cache) = NULL; \
+ } \
+ } while (0)
+
+struct amdgpu_svm {
+ struct drm_gpusvm gpusvm;
+ struct kref refcount;
+ struct amdgpu_device *adev;
+ struct amdgpu_vm *vm;
+ struct amdgpu_svm_attr_tree *attr_tree;
+ struct workqueue_struct *gc_wq;
+ struct workqueue_struct *restore_wq;
+ struct rw_semaphore svm_lock;
+ spinlock_t gc_lock;
+ struct list_head gc_list;
+ struct work_struct gc_work;
+ struct list_head restore_work_list;
+ struct delayed_work restore_work;
+ atomic_t kfd_queues_quiesced;
+ atomic_t evicted_ranges;
+ atomic_t exiting;
+ u8 default_granularity;
+ bool xnack_enabled;
+ void (*begin_restore)(struct amdgpu_svm *svm);
+ void (*end_restore)(struct amdgpu_svm *svm);
+ void (*flush_tlb)(struct amdgpu_svm *svm);
+};
+
+static inline struct amdgpu_svm *to_amdgpu_svm(struct drm_gpusvm *gpusvm)
+{
+ return container_of(gpusvm, struct amdgpu_svm, gpusvm);
+}
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_SVM)
+int amdgpu_svm_cache_init(void);
+void amdgpu_svm_cache_fini(void);
+
+int amdgpu_svm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_svm_close(struct amdgpu_vm *vm);
+void amdgpu_svm_fini(struct amdgpu_vm *vm);
+
+int amdgpu_svm_handle_fault(struct amdgpu_device *adev, uint32_t pasid,
+ uint64_t fault_addr, bool write_fault);
+bool amdgpu_svm_is_enabled(struct amdgpu_vm *vm);
+
+int amdgpu_gem_svm_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+#else
+static inline int amdgpu_svm_init(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ return 0;
+}
+
+static inline int amdgpu_svm_cache_init(void)
+{
+ return 0;
+}
+
+static inline void amdgpu_svm_cache_fini(void)
+{
+}
+
+static inline void amdgpu_svm_close(struct amdgpu_vm *vm)
+{
+}
+
+static inline void amdgpu_svm_fini(struct amdgpu_vm *vm)
+{
+}
+
+static inline int amdgpu_svm_handle_fault(struct amdgpu_device *adev,
+ uint32_t pasid,
+ uint64_t fault_addr,
+ bool write_fault)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool amdgpu_svm_is_enabled(struct amdgpu_vm *vm)
+{
+ return false;
+}
+
+static inline int amdgpu_gem_svm_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_DRM_AMDGPU_SVM */
+
+#endif /* __AMDGPU_SVM_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cf0ec94e8..7a5aeefdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -43,6 +43,7 @@ struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
struct amdgpu_bo_vm;
+struct amdgpu_svm;
/*
* GPUVM handling
@@ -445,6 +446,9 @@ struct amdgpu_vm {
/* cached fault info */
struct amdgpu_vm_fault_info fault_info;
+
+ /* SVM experimental implementation */
+ struct amdgpu_svm *svm;
};
struct amdgpu_vm_manager {
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 03/12] drm/amdgpu: add SVM attribute data structures
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 01/12] drm/amdgpu: add SVM UAPI definitions Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 02/12] drm/amdgpu: add SVM data structures and header Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 04/12] drm/amdgpu: implement SVM attribute tree operations Honglei Huang
` (8 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Add the SVM attribute subsystem header defining:
- enum amdgpu_svm_attr_access
- flag masks for change
- struct amdgpu_svm_attrs spereate with drm svm range
- struct amdgpu_svm_attr_range: interval-tree node
- struct amdgpu_svm_attr_tree
- enum amdgpu_svm_attr_change_trigger
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.h | 110 +++++++++++++++++++
1 file changed, 110 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.h
new file mode 100644
index 000000000..d49f6bb72
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SVM_ATTR_H__
+#define __AMDGPU_SVM_ATTR_H__
+
+#include <drm/amdgpu_drm.h>
+#include <linux/interval_tree.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+
+/* one fd one svm one GPU so no bit map
+ * only three status for this pattren.
+ */
+enum amdgpu_svm_attr_access {
+ AMDGPU_SVM_ACCESS_NONE = 0,
+ AMDGPU_SVM_ACCESS_ENABLE = 1,
+ AMDGPU_SVM_ACCESS_IN_PLACE = 2,
+};
+
+#define AMDGPU_SVM_PTE_FLAG_MASK \
+ (AMDGPU_SVM_FLAG_COHERENT | AMDGPU_SVM_FLAG_EXT_COHERENT | \
+ AMDGPU_SVM_FLAG_GPU_RO | AMDGPU_SVM_FLAG_GPU_EXEC)
+
+#define AMDGPU_SVM_MAPPING_FLAG_MASK \
+ (AMDGPU_SVM_FLAG_HOST_ACCESS | AMDGPU_SVM_FLAG_HIVE_LOCAL | \
+ AMDGPU_SVM_FLAG_GPU_READ_MOSTLY | AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED)
+
+struct amdgpu_svm_attrs {
+ /* keep preferred_loc to adapt to kfd API */
+ int32_t preferred_loc;
+ int32_t prefetch_loc;
+ uint32_t flags;
+ uint32_t granularity;
+ enum amdgpu_svm_attr_access access;
+};
+
+struct amdgpu_svm_attr_range {
+ struct interval_tree_node it_node;
+ struct list_head list;
+ struct amdgpu_svm_attrs attrs;
+};
+
+struct amdgpu_svm;
+
+struct amdgpu_svm_attr_tree {
+ struct mutex lock;
+ struct rb_root_cached tree;
+ struct list_head range_list;
+ struct amdgpu_svm *svm;
+};
+
+enum amdgpu_svm_attr_change_trigger {
+ AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE = (1U << 0),
+ AMDGPU_SVM_ATTR_TRIGGER_PTE_FLAG_CHANGE = (1U << 1),
+ AMDGPU_SVM_ATTR_TRIGGER_MAPPING_FLAG_CHANGE = (1U << 2),
+ AMDGPU_SVM_ATTR_TRIGGER_LOCATION_CHANGE = (1U << 3),
+ AMDGPU_SVM_ATTR_TRIGGER_GRANULARITY_CHANGE = (1U << 4),
+ AMDGPU_SVM_ATTR_TRIGGER_ATTR_ONLY = (1U << 5), /* no changes */
+};
+
+struct amdgpu_svm_attr_tree *
+amdgpu_svm_attr_tree_create(struct amdgpu_svm *svm);
+void amdgpu_svm_attr_tree_destroy(struct amdgpu_svm_attr_tree *attr_tree);
+int amdgpu_svm_attr_cache_init(void);
+void amdgpu_svm_attr_cache_fini(void);
+void amdgpu_svm_attr_lookup_page_locked(struct amdgpu_svm_attr_tree *attr_tree,
+ unsigned long page,
+ struct amdgpu_svm_attrs *attrs,
+ unsigned long *seg_last);
+
+int amdgpu_svm_attr_set(struct amdgpu_svm_attr_tree *attr_tree,
+ uint64_t start,
+ uint64_t size,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs);
+int amdgpu_svm_attr_get(struct amdgpu_svm_attr_tree *attr_tree,
+ uint64_t start,
+ uint64_t size,
+ uint32_t nattr,
+ struct drm_amdgpu_svm_attribute *attrs);
+int amdgpu_svm_attr_clear_pages(struct amdgpu_svm_attr_tree *attr_tree,
+ unsigned long start_page,
+ unsigned long last_page);
+
+#endif /* __AMDGPU_SVM_ATTR_H__ */
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 04/12] drm/amdgpu: implement SVM attribute tree operations
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (2 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 03/12] drm/amdgpu: add SVM attribute data structures Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 05/12] drm/amdgpu: implement SVM attribute set Honglei Huang
` (7 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Implement the attribyte tree operations.
- Attribute tree operations
- amdgpu_svm_attr_tree_create/destroy for lifecycle management
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c | 346 +++++++++++++++++++
1 file changed, 346 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
new file mode 100644
index 000000000..137dfcb58
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_svm.h"
+#include "amdgpu_svm_attr.h"
+#include "amdgpu_svm_range.h"
+
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/lockdep.h>
+#include <linux/minmax.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+static struct kmem_cache *amdgpu_svm_attr_range_cache;
+
+struct attr_get_ctx {
+ int32_t preferred_loc;
+ int32_t prefetch_loc;
+ enum amdgpu_svm_attr_access access;
+ uint32_t granularity;
+ uint32_t flags_and;
+ uint32_t flags_or;
+ bool has_range;
+};
+
+int amdgpu_svm_attr_cache_init(void)
+{
+ amdgpu_svm_attr_range_cache = AMDGPU_SVM_KMEM_CACHE_CREATE(
+ "amdgpu_svm_attr_range_cache", struct amdgpu_svm_attr_range);
+ if (!amdgpu_svm_attr_range_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_svm_attr_cache_fini(void)
+{
+ AMDGPU_SVM_KMEM_CACHE_DESTROY(amdgpu_svm_attr_range_cache);
+}
+
+static void attr_set_interval(struct amdgpu_svm_attr_range *range,
+ unsigned long start_page,
+ unsigned long last_page)
+{
+ range->it_node.start = start_page;
+ range->it_node.last = last_page;
+}
+
+static unsigned long attr_start_page(const struct amdgpu_svm_attr_range *range)
+{
+ return range->it_node.start;
+}
+
+static unsigned long attr_last_page(const struct amdgpu_svm_attr_range *range)
+{
+ return range->it_node.last;
+}
+
+static void attr_set_default(struct amdgpu_svm *svm,
+ struct amdgpu_svm_attrs *attrs)
+{
+ attrs->preferred_loc = AMDGPU_SVM_LOCATION_UNDEFINED;
+ attrs->prefetch_loc = AMDGPU_SVM_LOCATION_UNDEFINED;
+ attrs->granularity = svm->default_granularity;
+ attrs->flags = AMDGPU_SVM_FLAG_HOST_ACCESS | AMDGPU_SVM_FLAG_COHERENT;
+ attrs->access = svm->xnack_enabled ?
+ AMDGPU_SVM_ACCESS_ENABLE : AMDGPU_SVM_ACCESS_NONE;
+}
+
+void amdgpu_svm_attr_lookup_page_locked(struct amdgpu_svm_attr_tree *attr_tree,
+ unsigned long page,
+ struct amdgpu_svm_attrs *attrs,
+ unsigned long *range_last)
+{
+ struct interval_tree_node *node;
+ struct amdgpu_svm_attr_range *range;
+
+ node = interval_tree_iter_first(&attr_tree->tree, page, page);
+ if (node) {
+ range = container_of(node, struct amdgpu_svm_attr_range, it_node);
+ *attrs = range->attrs;
+ *range_last = range->it_node.last;
+ return;
+ }
+
+ attr_set_default(attr_tree->svm, attrs);
+ *range_last = ULONG_MAX;
+
+ if (page == ULONG_MAX)
+ return;
+
+ node = interval_tree_iter_first(&attr_tree->tree, page + 1, ULONG_MAX);
+ if (!node)
+ return;
+
+ range = container_of(node, struct amdgpu_svm_attr_range, it_node);
+ if (range->it_node.start > page)
+ *range_last = range->it_node.start - 1;
+}
+
+static bool amdgpu_svm_attr_equal(const struct amdgpu_svm_attrs *a,
+ const struct amdgpu_svm_attrs *b)
+{
+ return a->flags == b->flags &&
+ a->preferred_loc == b->preferred_loc &&
+ a->prefetch_loc == b->prefetch_loc &&
+ a->granularity == b->granularity &&
+ a->access == b->access;
+}
+
+static struct amdgpu_svm_attr_range *
+attr_alloc_range(unsigned long start,
+ unsigned long last,
+ const struct amdgpu_svm_attrs *attrs)
+{
+ struct amdgpu_svm_attr_range *range;
+
+ range = kmem_cache_zalloc(amdgpu_svm_attr_range_cache, GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ INIT_LIST_HEAD(&range->list);
+ attr_set_interval(range, start, last);
+ range->attrs = *attrs;
+ return range;
+}
+
+static void attr_insert_range_locked(struct amdgpu_svm_attr_tree *attr_tree,
+ struct amdgpu_svm_attr_range *range)
+{
+ struct interval_tree_node *node;
+ struct amdgpu_svm_attr_range *next;
+
+ lockdep_assert_held(&attr_tree->lock);
+
+ node = interval_tree_iter_first(&attr_tree->tree, attr_start_page(range),
+ ULONG_MAX);
+ if (node) {
+ next = container_of(node, struct amdgpu_svm_attr_range, it_node);
+ list_add_tail(&range->list, &next->list);
+ } else {
+ list_add_tail(&range->list, &attr_tree->range_list);
+ }
+
+ interval_tree_insert(&range->it_node, &attr_tree->tree);
+}
+
+static void attr_remove_range_locked(struct amdgpu_svm_attr_tree *attr_tree,
+ struct amdgpu_svm_attr_range *range,
+ bool free_range)
+{
+ lockdep_assert_held(&attr_tree->lock);
+
+ interval_tree_remove(&range->it_node, &attr_tree->tree);
+ list_del_init(&range->list);
+ if (free_range)
+ kmem_cache_free(amdgpu_svm_attr_range_cache, range);
+}
+
+struct amdgpu_svm_attr_tree *
+amdgpu_svm_attr_tree_create(struct amdgpu_svm *svm)
+{
+ struct amdgpu_svm_attr_tree *attr_tree;
+
+ attr_tree = kzalloc(sizeof(*attr_tree), GFP_KERNEL);
+ if (!attr_tree)
+ return NULL;
+
+ mutex_init(&attr_tree->lock);
+ attr_tree->tree = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&attr_tree->range_list);
+ attr_tree->svm = svm;
+ return attr_tree;
+}
+
+void amdgpu_svm_attr_tree_destroy(struct amdgpu_svm_attr_tree *attr_tree)
+{
+ struct amdgpu_svm_attr_range *range, *tmp;
+
+ if (!attr_tree)
+ return;
+
+ mutex_lock(&attr_tree->lock);
+ list_for_each_entry_safe(range, tmp, &attr_tree->range_list, list) {
+ interval_tree_remove(&range->it_node, &attr_tree->tree);
+ list_del_init(&range->list);
+ kmem_cache_free(amdgpu_svm_attr_range_cache, range);
+ }
+ mutex_unlock(&attr_tree->lock);
+
+ mutex_destroy(&attr_tree->lock);
+ kfree(attr_tree);
+}
+
+static void attr_get_ctx_add(struct attr_get_ctx *ctx,
+ const struct amdgpu_svm_attrs *attrs)
+{
+ if (!ctx->has_range) {
+ ctx->preferred_loc = attrs->preferred_loc;
+ ctx->prefetch_loc = attrs->prefetch_loc;
+ ctx->granularity = attrs->granularity;
+ ctx->access = attrs->access;
+ ctx->flags_and = attrs->flags;
+ ctx->flags_or = attrs->flags;
+ ctx->has_range = true;
+ return;
+ }
+
+ if (ctx->preferred_loc != attrs->preferred_loc)
+ ctx->preferred_loc = AMDGPU_SVM_LOCATION_UNDEFINED;
+ if (ctx->prefetch_loc != attrs->prefetch_loc)
+ ctx->prefetch_loc = AMDGPU_SVM_LOCATION_UNDEFINED;
+ if (attrs->granularity < ctx->granularity)
+ ctx->granularity = attrs->granularity;
+ if (ctx->access != attrs->access)
+ ctx->access = AMDGPU_SVM_ACCESS_NONE;
+ ctx->flags_and &= attrs->flags;
+ ctx->flags_or |= attrs->flags;
+}
+
+static int attr_get_ctx_to_result(const struct attr_get_ctx *ctx,
+ uint32_t nattr,
+ struct drm_amdgpu_svm_attribute *attrs)
+{
+ uint32_t i;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case AMDGPU_SVM_ATTR_PREFERRED_LOC:
+ attrs[i].value = ctx->preferred_loc;
+ break;
+ case AMDGPU_SVM_ATTR_PREFETCH_LOC:
+ attrs[i].value = ctx->prefetch_loc;
+ break;
+ case AMDGPU_SVM_ATTR_ACCESS:
+ if (ctx->access == AMDGPU_SVM_ACCESS_ENABLE)
+ attrs[i].type = AMDGPU_SVM_ATTR_ACCESS;
+ else if (ctx->access == AMDGPU_SVM_ACCESS_IN_PLACE)
+ attrs[i].type = AMDGPU_SVM_ATTR_ACCESS_IN_PLACE;
+ else
+ attrs[i].type = AMDGPU_SVM_ATTR_NO_ACCESS;
+ break;
+ case AMDGPU_SVM_ATTR_SET_FLAGS:
+ attrs[i].value = ctx->flags_and;
+ break;
+ case AMDGPU_SVM_ATTR_CLR_FLAGS:
+ attrs[i].value = ~ctx->flags_or;
+ break;
+ case AMDGPU_SVM_ATTR_GRANULARITY:
+ attrs[i].value = ctx->granularity;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_svm_attr_get(struct amdgpu_svm_attr_tree *attr_tree,
+ uint64_t start, uint64_t size,
+ uint32_t nattr,
+ struct drm_amdgpu_svm_attribute *attrs)
+{
+ struct amdgpu_svm_attrs default_attrs;
+ struct attr_get_ctx ctx = { 0 };
+ struct interval_tree_node *node;
+ unsigned long start_page, last_page, cursor;
+ int r;
+
+ start_page = start >> PAGE_SHIFT;
+ last_page = (start + size - 1) >> PAGE_SHIFT;
+
+ mutex_lock(&attr_tree->lock);
+ attr_set_default(attr_tree->svm, &default_attrs);
+ node = interval_tree_iter_first(&attr_tree->tree, start_page, last_page);
+
+ cursor = start_page;
+ while (cursor <= last_page) {
+ const struct amdgpu_svm_attrs *range_attrs;
+ unsigned long range_last = last_page;
+ struct amdgpu_svm_attr_range *range = NULL;
+ unsigned long next;
+
+ if (node) {
+ range = container_of(node, struct amdgpu_svm_attr_range,
+ it_node);
+
+ if (attr_last_page(range) < cursor) {
+ node = interval_tree_iter_next(node, start_page,
+ last_page);
+ continue;
+ }
+
+ if (attr_start_page(range) <= cursor) {
+ range_last = min(last_page, attr_last_page(range));
+ node = interval_tree_iter_next(node, start_page,
+ last_page);
+ } else {
+ range_last = min(last_page,
+ attr_start_page(range) - 1);
+ range = NULL;
+ }
+ }
+
+ range_attrs = range ? &range->attrs : &default_attrs;
+ attr_get_ctx_add(&ctx, range_attrs);
+
+ if (range_last == ULONG_MAX)
+ break;
+
+ next = range_last + 1;
+ if (next <= cursor)
+ break;
+ cursor = next;
+ }
+
+ if (!ctx.has_range)
+ attr_get_ctx_add(&ctx, &default_attrs);
+
+ r = attr_get_ctx_to_result(&ctx, nattr, attrs);
+ mutex_unlock(&attr_tree->lock);
+ return r;
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 05/12] drm/amdgpu: implement SVM attribute set
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (3 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 04/12] drm/amdgpu: implement SVM attribute tree operations Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 06/12] drm/amdgpu: add SVM range data structures Honglei Huang
` (6 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Implement the attribute set path
- Attribute application: apply UAPI attributes to internal attrs
- Attribute tree set split remove.
- amdgpu_svm_attr_set with retry on -EAGAIN
- amdgpu_svm_attr_clear_pages: remove attribute ranges for unmapped
operations.
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c | 548 +++++++++++++++++++
1 file changed, 548 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
index 137dfcb58..cd972026f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
@@ -33,8 +33,23 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#define AMDGPU_SVM_VALID_FLAG_MASK \
+ (AMDGPU_SVM_FLAG_HOST_ACCESS | AMDGPU_SVM_FLAG_COHERENT | \
+ AMDGPU_SVM_FLAG_HIVE_LOCAL | AMDGPU_SVM_FLAG_GPU_RO | \
+ AMDGPU_SVM_FLAG_GPU_EXEC | AMDGPU_SVM_FLAG_GPU_READ_MOSTLY | \
+ AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED | AMDGPU_SVM_FLAG_EXT_COHERENT)
+
+
static struct kmem_cache *amdgpu_svm_attr_range_cache;
+struct attr_set_ctx {
+ unsigned long start;
+ unsigned long last;
+ uint32_t trigger;
+ struct amdgpu_svm_attrs prev_attrs;
+ struct amdgpu_svm_attrs new_attrs;
+};
+
struct attr_get_ctx {
int32_t preferred_loc;
int32_t prefetch_loc;
@@ -130,6 +145,48 @@ static bool amdgpu_svm_attr_equal(const struct amdgpu_svm_attrs *a,
a->access == b->access;
}
+static uint32_t
+attr_change_ctx_trigger(const struct amdgpu_svm_attrs *prev_attrs,
+ const struct amdgpu_svm_attrs *new_attrs)
+{
+ uint32_t trigger = 0;
+ uint32_t changed_flags = prev_attrs->flags ^ new_attrs->flags;
+
+ if (prev_attrs->access != new_attrs->access)
+ trigger |= AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE;
+
+ if (changed_flags & AMDGPU_SVM_PTE_FLAG_MASK)
+ trigger |= AMDGPU_SVM_ATTR_TRIGGER_PTE_FLAG_CHANGE;
+ if (changed_flags & AMDGPU_SVM_MAPPING_FLAG_MASK)
+ trigger |= AMDGPU_SVM_ATTR_TRIGGER_MAPPING_FLAG_CHANGE;
+ if (prev_attrs->preferred_loc != new_attrs->preferred_loc ||
+ prev_attrs->prefetch_loc != new_attrs->prefetch_loc)
+ trigger |= AMDGPU_SVM_ATTR_TRIGGER_LOCATION_CHANGE;
+ if (prev_attrs->granularity != new_attrs->granularity)
+ trigger |= AMDGPU_SVM_ATTR_TRIGGER_GRANULARITY_CHANGE;
+
+ if (!trigger)
+ trigger = AMDGPU_SVM_ATTR_TRIGGER_ATTR_ONLY;
+
+ return trigger;
+}
+
+static bool attr_has_access(uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs)
+{
+ uint32_t i;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case AMDGPU_SVM_ATTR_ACCESS:
+ case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+ return true;
+ }
+ }
+
+ return false;
+}
+
static struct amdgpu_svm_attr_range *
attr_alloc_range(unsigned long start,
unsigned long last,
@@ -179,6 +236,388 @@ static void attr_remove_range_locked(struct amdgpu_svm_attr_tree *attr_tree,
kmem_cache_free(amdgpu_svm_attr_range_cache, range);
}
+static void amdgpu_svm_attr_change_ctx_set(
+ struct attr_set_ctx *change,
+ unsigned long start,
+ unsigned long last,
+ uint32_t trigger,
+ const struct amdgpu_svm_attrs *prev_attrs,
+ const struct amdgpu_svm_attrs *new_attrs)
+{
+ change->start = start;
+ change->last = last;
+ change->trigger = trigger;
+ change->prev_attrs = *prev_attrs;
+ change->new_attrs = *new_attrs;
+}
+
+static int amdgpu_svm_attr_apply_change(
+ struct amdgpu_svm *svm,
+ const struct attr_set_ctx *change)
+{
+ int ret;
+
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ if (!change->trigger ||
+ change->trigger == AMDGPU_SVM_ATTR_TRIGGER_ATTR_ONLY)
+ return 0;
+
+ ret = amdgpu_svm_range_apply_attr_change(svm, change->start, change->last,
+ change->trigger, &change->prev_attrs,
+ &change->new_attrs);
+ if (ret)
+ AMDGPU_SVM_TRACE("mapping apply failed ret=%d [0x%lx-0x%lx]-0x%lx trigger=0x%x\n",
+ ret, change->start, change->last,
+ change->last - change->start + 1,
+ change->trigger);
+
+ return ret;
+}
+
+static inline int attr_check_preferred_loc(uint32_t value)
+{
+ /* casue one svm one gpu so value > 0 then means prefered loc is this GPU */
+ if (value == AMDGPU_SVM_LOCATION_SYSMEM || value == AMDGPU_SVM_LOCATION_UNDEFINED)
+ return 0;
+
+ return 0;
+}
+
+static inline int attr_check_prefetch_loc(uint32_t value)
+{
+ /* casue one svm one gpu so value > 0 then means prefetch loc is this GPU
+ * keep prefetch loc to adapt to KFD API
+ */
+ if (value == AMDGPU_SVM_LOCATION_SYSMEM)
+ return 0;
+
+ if (value == AMDGPU_SVM_LOCATION_UNDEFINED)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int attr_check_access(uint32_t value)
+{
+ if (!value || value == AMDGPU_SVM_LOCATION_UNDEFINED)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int attr_check_flags(uint32_t value)
+{
+ if (value & ~AMDGPU_SVM_VALID_FLAG_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int attr_check_granularity(uint32_t value)
+{
+ return 0;
+}
+
+static int
+amdgpu_svm_attr_validate_range_vma(struct amdgpu_svm_attr_tree *attr_tree,
+ unsigned long start_page,
+ unsigned long last_page)
+{
+ const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
+ struct mm_struct *mm;
+ unsigned long start, end;
+ int ret = 0;
+
+ if (start_page > last_page)
+ return -EINVAL;
+
+ if (last_page == ULONG_MAX)
+ return -EINVAL;
+
+ start = start_page << PAGE_SHIFT;
+ end = (last_page + 1) << PAGE_SHIFT;
+ mm = attr_tree->svm->gpusvm.mm;
+ if (!mm)
+ return -EFAULT;
+
+ mmap_read_lock(mm);
+ while (start < end) {
+ struct vm_area_struct *vma = vma_lookup(mm, start);
+
+ if (!vma || (vma->vm_flags & device_vma)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ start = min(end, vma->vm_end);
+ }
+ mmap_read_unlock(mm);
+
+ return ret;
+}
+
+static int amdgpu_svm_attr_set_validate(const struct drm_amdgpu_svm_attribute *attr)
+{
+ switch (attr->type) {
+ case AMDGPU_SVM_ATTR_PREFERRED_LOC:
+ return attr_check_preferred_loc(attr->value);
+ case AMDGPU_SVM_ATTR_PREFETCH_LOC:
+ return attr_check_prefetch_loc(attr->value);
+ case AMDGPU_SVM_ATTR_ACCESS:
+ case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+ case AMDGPU_SVM_ATTR_NO_ACCESS:
+ return attr_check_access(attr->value);
+ case AMDGPU_SVM_ATTR_SET_FLAGS:
+ case AMDGPU_SVM_ATTR_CLR_FLAGS:
+ return attr_check_flags(attr->value);
+ case AMDGPU_SVM_ATTR_GRANULARITY:
+ return attr_check_granularity(attr->value);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void amdgpu_svm_attr_apply(struct amdgpu_svm_attrs *attrs,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *pattrs)
+{
+ const struct drm_amdgpu_svm_attribute *attr;
+
+ for (attr = pattrs; nattr--; attr++) {
+ switch (attr->type) {
+ case AMDGPU_SVM_ATTR_PREFERRED_LOC:
+ attrs->preferred_loc = (int32_t)attr->value;
+ break;
+ case AMDGPU_SVM_ATTR_PREFETCH_LOC:
+ attrs->prefetch_loc = (int32_t)attr->value;
+ break;
+ case AMDGPU_SVM_ATTR_ACCESS:
+ attrs->access = AMDGPU_SVM_ACCESS_ENABLE;
+ break;
+ case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+ attrs->access = AMDGPU_SVM_ACCESS_IN_PLACE;
+ break;
+ case AMDGPU_SVM_ATTR_NO_ACCESS:
+ attrs->access = AMDGPU_SVM_ACCESS_NONE;
+ break;
+ case AMDGPU_SVM_ATTR_SET_FLAGS:
+ attrs->flags |= attr->value;
+ break;
+ case AMDGPU_SVM_ATTR_CLR_FLAGS:
+ attrs->flags &= ~attr->value;
+ break;
+ case AMDGPU_SVM_ATTR_GRANULARITY:
+ attrs->granularity = min_t(uint32_t, attr->value, 0x3f);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static bool attr_same_attrs(const struct amdgpu_svm_attr_range *range,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs)
+{
+ struct amdgpu_svm_attrs target;
+
+ target = range->attrs;
+ amdgpu_svm_attr_apply(&target, nattr, attrs);
+ return amdgpu_svm_attr_equal(&range->attrs, &target);
+}
+
+static int
+amdgpu_svm_attr_set_hole(struct amdgpu_svm_attr_tree *attr_tree,
+ const struct amdgpu_svm_attrs *default_attrs,
+ unsigned long start, unsigned long last,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs,
+ struct attr_set_ctx *change)
+{
+ struct amdgpu_svm_attrs new_attrs;
+ struct amdgpu_svm_attr_range *range;
+ uint32_t trigger;
+
+ lockdep_assert_held(&attr_tree->lock);
+
+ if (start > last)
+ return 0;
+
+ /* no action if default attr */
+ new_attrs = *default_attrs;
+ amdgpu_svm_attr_apply(&new_attrs, nattr, attrs);
+ if (amdgpu_svm_attr_equal(default_attrs, &new_attrs))
+ return 0;
+
+ range = attr_alloc_range(start, last, &new_attrs);
+ if (!range)
+ return -ENOMEM;
+
+ attr_insert_range_locked(attr_tree, range);
+
+ trigger = attr_change_ctx_trigger(default_attrs, &new_attrs);
+ amdgpu_svm_attr_change_ctx_set(change, start, last, trigger,
+ default_attrs, &new_attrs);
+ return 0;
+}
+
+static int
+amdgpu_svm_attr_set_existing(struct amdgpu_svm_attr_tree *attr_tree,
+ struct amdgpu_svm_attr_range *range,
+ unsigned long start, unsigned long last,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs,
+ struct attr_set_ctx *change)
+{
+ unsigned long range_start = attr_start_page(range);
+ unsigned long range_last = attr_last_page(range);
+ struct amdgpu_svm_attr_range *left = NULL;
+ struct amdgpu_svm_attr_range *right = NULL;
+ struct amdgpu_svm_attrs old_attrs;
+ struct amdgpu_svm_attrs new_attrs;
+ uint32_t trigger;
+ bool force_trigger;
+
+ lockdep_assert_held(&attr_tree->lock);
+
+ old_attrs = range->attrs;
+
+ /* The attr layer doesn't store the gpu mapped state, and for align with KFD,
+ * need force trigger range layer to check if gpu mapped.
+ */
+ force_trigger = !attr_tree->svm->xnack_enabled && attr_has_access(nattr, attrs);
+
+ if (attr_same_attrs(range, nattr, attrs)) {
+ if (!force_trigger)
+ return 0;
+
+ amdgpu_svm_attr_change_ctx_set(change, start, last,
+ AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE,
+ &old_attrs, &old_attrs);
+ return 0;
+ }
+
+ new_attrs = old_attrs;
+ amdgpu_svm_attr_apply(&new_attrs, nattr, attrs);
+ trigger = attr_change_ctx_trigger(&old_attrs, &new_attrs);
+
+ /* only need to update attr */
+ if (start == range_start && last == range_last) {
+ range->attrs = new_attrs;
+ amdgpu_svm_attr_change_ctx_set(change, start, last,
+ trigger, &old_attrs, &new_attrs);
+ return 0;
+ }
+
+ /* split head */
+ if (start > range_start) {
+ left = attr_alloc_range(range_start, start - 1, &old_attrs);
+ if (!left)
+ return -ENOMEM;
+ }
+
+ /* split tail */
+ if (last < range_last) {
+ right = attr_alloc_range(last + 1, range_last, &old_attrs);
+ if (!right) {
+ if (left)
+ kmem_cache_free(amdgpu_svm_attr_range_cache, left);
+ return -ENOMEM;
+ }
+ }
+
+ attr_remove_range_locked(attr_tree, range, false);
+ if (left)
+ attr_insert_range_locked(attr_tree, left);
+ attr_set_interval(range, start, last);
+ range->attrs = new_attrs;
+ attr_insert_range_locked(attr_tree, range);
+ if (right)
+ attr_insert_range_locked(attr_tree, right);
+
+ amdgpu_svm_attr_change_ctx_set(change, start, last, trigger,
+ &old_attrs, &new_attrs);
+ return 0;
+}
+
+static int
+amdgpu_svm_attr_set_range(struct amdgpu_svm_attr_tree *attr_tree,
+ const struct amdgpu_svm_attrs *default_attrs,
+ unsigned long start, unsigned long last,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs)
+{
+ struct amdgpu_svm *svm = attr_tree->svm;
+ unsigned long cursor = start;
+ bool need_retry = false;
+
+ while (cursor <= last) {
+ struct interval_tree_node *node;
+ unsigned long seg_last;
+ struct attr_set_ctx change = { 0 };
+ int ret;
+
+ mutex_lock(&attr_tree->lock);
+ node = interval_tree_iter_first(&attr_tree->tree, cursor, cursor);
+ if (node) {
+ struct amdgpu_svm_attr_range *range;
+
+ range = container_of(node, struct amdgpu_svm_attr_range, it_node);
+ seg_last = min(last, attr_last_page(range));
+ ret = amdgpu_svm_attr_set_existing(attr_tree, range,
+ cursor, seg_last,
+ nattr, attrs, &change);
+ } else {
+ struct interval_tree_node *next;
+
+ seg_last = last;
+ if (cursor != ULONG_MAX) {
+ next = interval_tree_iter_first(&attr_tree->tree,
+ cursor + 1,
+ ULONG_MAX);
+ if (next) {
+ struct amdgpu_svm_attr_range *next_range;
+
+ next_range = container_of(next,
+ struct amdgpu_svm_attr_range,
+ it_node);
+ seg_last = min(last,
+ attr_start_page(next_range) - 1);
+ }
+ }
+ ret = amdgpu_svm_attr_set_hole(attr_tree,
+ default_attrs,
+ cursor, seg_last,
+ nattr, attrs,
+ &change);
+ }
+ mutex_unlock(&attr_tree->lock);
+
+ if (ret)
+ return ret;
+
+ down_write(&svm->svm_lock);
+ ret = amdgpu_svm_attr_apply_change(svm, &change);
+ up_write(&svm->svm_lock);
+
+ if (ret == -EAGAIN) {
+ need_retry = true;
+ ret = 0;
+ }
+
+ if (ret)
+ return ret;
+
+ if (seg_last == ULONG_MAX || seg_last == last)
+ break;
+
+ cursor = seg_last + 1;
+ }
+
+ return need_retry ? -EAGAIN : 0;
+}
+
struct amdgpu_svm_attr_tree *
amdgpu_svm_attr_tree_create(struct amdgpu_svm *svm)
{
@@ -214,6 +653,115 @@ void amdgpu_svm_attr_tree_destroy(struct amdgpu_svm_attr_tree *attr_tree)
kfree(attr_tree);
}
+int amdgpu_svm_attr_set(struct amdgpu_svm_attr_tree *attr_tree,
+ uint64_t start,
+ uint64_t size,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs)
+{
+ struct amdgpu_svm *svm = attr_tree->svm;
+ struct amdgpu_svm_attrs default_attrs;
+ unsigned long start_page, last_page;
+ uint32_t i;
+ int r;
+
+ start_page = start >> PAGE_SHIFT;
+ last_page = (start + size - 1) >> PAGE_SHIFT;
+
+ for (i = 0; i < nattr; i++) {
+ AMDGPU_SVM_TRACE("set attr type %u value 0x%08x for page range [%lx, %lx] xnack:%d",
+ attrs[i].type, attrs[i].value, start_page, last_page, svm->xnack_enabled ? 1 : 0);
+ r = amdgpu_svm_attr_set_validate(&attrs[i]);
+ if (r) {
+ AMDGPU_SVM_TRACE("invalid attribute %u value 0x%08x", attrs[i].type, attrs[i].value);
+ return r;
+ }
+ }
+
+ r = amdgpu_svm_attr_validate_range_vma(attr_tree, start_page, last_page);
+ if (r)
+ return r;
+
+ attr_set_default(attr_tree->svm, &default_attrs);
+
+ /*
+ * POC/WA:
+ * can not acquire the mmap lock because of drm gpu svm frame work design (drm_gpusvm_range_find_or_insert)
+ * the hmm operations and GPU mapping possiable to fail so add retry mechanism
+ *
+ * TODO: add mmap locked flag in drm_gpusvm_ctx to acquire mmap lock in entire ioctl period
+ */
+retry:
+ r = amdgpu_svm_attr_set_range(attr_tree, &default_attrs,
+ start_page, last_page,
+ nattr, attrs);
+ if (r == -EAGAIN) {
+ AMDGPU_SVM_TRACE("attr_set retry [0x%lx-0x%lx]\n",
+ start_page, last_page);
+ amdgpu_svm_range_flush(svm);
+ cond_resched();
+ goto retry;
+ }
+
+ return r;
+}
+
+int amdgpu_svm_attr_clear_pages(struct amdgpu_svm_attr_tree *attr_tree,
+ unsigned long start_page,
+ unsigned long last_page)
+{
+ struct interval_tree_node *node;
+ int r = 0;
+
+ if (start_page > last_page)
+ return -EINVAL;
+
+ mutex_lock(&attr_tree->lock);
+
+ node = interval_tree_iter_first(&attr_tree->tree, start_page, last_page);
+ while (node) {
+ struct interval_tree_node *next;
+ struct amdgpu_svm_attr_range *range;
+ unsigned long range_start;
+ unsigned long range_last;
+
+ range = container_of(node, struct amdgpu_svm_attr_range, it_node);
+ next = interval_tree_iter_next(node, start_page, last_page);
+ range_start = attr_start_page(range);
+ range_last = attr_last_page(range);
+
+ if (range_start < start_page && range_last > last_page) {
+ struct amdgpu_svm_attr_range *tail;
+
+ tail = attr_alloc_range(last_page + 1, range_last, &range->attrs);
+ if (!tail) {
+ r = -ENOMEM;
+ break;
+ }
+
+ attr_remove_range_locked(attr_tree, range, false);
+ attr_set_interval(range, range_start, start_page - 1);
+ attr_insert_range_locked(attr_tree, range);
+ attr_insert_range_locked(attr_tree, tail);
+ } else if (range_start < start_page) {
+ attr_remove_range_locked(attr_tree, range, false);
+ attr_set_interval(range, range_start, start_page - 1);
+ attr_insert_range_locked(attr_tree, range);
+ } else if (range_last > last_page) {
+ attr_remove_range_locked(attr_tree, range, false);
+ attr_set_interval(range, last_page + 1, range_last);
+ attr_insert_range_locked(attr_tree, range);
+ } else {
+ attr_remove_range_locked(attr_tree, range, true);
+ }
+
+ node = next;
+ }
+
+ mutex_unlock(&attr_tree->lock);
+ return r;
+}
+
static void attr_get_ctx_add(struct attr_get_ctx *ctx,
const struct amdgpu_svm_attrs *attrs)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 06/12] drm/amdgpu: add SVM range data structures
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (4 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 05/12] drm/amdgpu: implement SVM attribute set Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 07/12] drm/amdgpu: implement SVM range PTE flags and GPU mapping Honglei Huang
` (5 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Add the SVM range header:
- struct amdgpu_svm_range: extends drm_gpusvm_range with mapping state
- helper functions
- Function declarations for range work init/fini, flush, sync,
mapping, attribute change application, invalidation, and queue
stop/restore.
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h | 76 +++++++++++++++++++
1 file changed, 76 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
new file mode 100644
index 000000000..18bf3dad1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SVM_RANGE_H__
+#define __AMDGPU_SVM_RANGE_H__
+
+#include <drm/drm_gpusvm.h>
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct amdgpu_svm;
+struct amdgpu_svm_attrs;
+struct drm_gpusvm_notifier;
+struct drm_gpusvm_range;
+struct mmu_notifier_range;
+
+struct amdgpu_svm_range {
+ struct drm_gpusvm_range base;
+ struct list_head gc_node;
+ bool gpu_mapped;
+ bool gc_queued;
+ bool restore_queued;
+ bool in_queue;
+ u8 pending_ops;
+ unsigned long pending_start;
+ unsigned long pending_last;
+ uint64_t pte_flags;
+ uint32_t attr_flags;
+};
+
+static inline struct amdgpu_svm_range *
+to_amdgpu_svm_range(struct drm_gpusvm_range *range)
+{
+ return container_of(range, struct amdgpu_svm_range, base);
+}
+
+int amdgpu_svm_range_work_init(struct amdgpu_svm *svm);
+void amdgpu_svm_range_work_fini(struct amdgpu_svm *svm);
+void amdgpu_svm_range_flush(struct amdgpu_svm *svm);
+void amdgpu_svm_range_sync_work(struct amdgpu_svm *svm);
+int amdgpu_svm_range_map_attr_ranges(struct amdgpu_svm *svm,
+ unsigned long start_page,
+ unsigned long last_page);
+int amdgpu_svm_range_apply_attr_change(
+ struct amdgpu_svm *svm, unsigned long start, unsigned long last,
+ uint32_t trigger, const struct amdgpu_svm_attrs *prev_attrs,
+ const struct amdgpu_svm_attrs *new_attrs);
+void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm,
+ struct drm_gpusvm_notifier *notifier,
+ const struct mmu_notifier_range *mmu_range);
+void amdgpu_svm_range_restore_begin_compute(struct amdgpu_svm *svm);
+void amdgpu_svm_range_restore_end_compute(struct amdgpu_svm *svm);
+
+#endif /* __AMDGPU_SVM_RANGE_H__ */
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 07/12] drm/amdgpu: implement SVM range PTE flags and GPU mapping
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (5 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 06/12] drm/amdgpu: add SVM range data structures Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 08/12] drm/amdgpu: implement SVM range notifier and invalidation Honglei Huang
` (4 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Implement the GPU page table mapping core for SVM ranges:
- PTE flag computation per GC IP version (9.4.x, 11.x, 12.x) with
coherency mode selection (UC/NC/CC/RW) based on SVM flags
- GPU PTE update helpers using amdgpu_vm_update_range with DMA
address coalescing across contiguous pagemap entries
- Range mapping loop: find_or_insert via drm_gpusvm, get_pages,
validate under notifier lock, update GPU PTEs, flush TLB
- Attribute-aware mapping: walk the attr tree to map only accessible
ranges with correct PTE flags
- Attribute change handler: detect trigger types and remap intervals
when PTE flags, mapping flags, or access state changes
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 539 ++++++++++++++++++
1 file changed, 539 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
new file mode 100644
index 000000000..b3bd4e2e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -0,0 +1,539 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_svm.h"
+#include "amdgpu_svm_attr.h"
+#include "amdgpu_svm_range.h"
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_vm.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_pagemap.h>
+
+#include <linux/mmu_notifier.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+enum amdgpu_svm_range_queue_op {
+ AMDGPU_SVM_RANGE_OP_RESTORE = 0,
+ AMDGPU_SVM_RANGE_OP_UNMAP = 1,
+};
+
+enum amdgpu_svm_range_pending_op {
+ AMDGPU_SVM_RANGE_PENDING_OP_NONE = 0,
+ AMDGPU_SVM_RANGE_PENDING_OP_UNMAP = BIT(0),
+ AMDGPU_SVM_RANGE_PENDING_OP_RESTORE = BIT(1),
+};
+
+#define UNMAP_WORK(ops) ((ops) & AMDGPU_SVM_RANGE_PENDING_OP_UNMAP)
+
+#define RESTORE_WORK(ops) ((ops) & AMDGPU_SVM_RANGE_PENDING_OP_RESTORE)
+
+#define NEED_REBUILD(svm) (!(svm)->xnack_enabled)
+
+enum amdgpu_svm_range_notifier_op {
+ AMDGPU_SVM_RANGE_NOTIFIER_CLEAR_PTE = BIT(0),
+ AMDGPU_SVM_RANGE_NOTIFIER_QUEUE_INTERVAL = BIT(1),
+};
+
+struct range_pending_op_ctx {
+ struct amdgpu_svm_range *range;
+ unsigned long start;
+ unsigned long last;
+ uint8_t pending_ops;
+};
+
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+#define AMDGPU_SVM_RANGE_WQ_NAME "amdgpu_svm_range"
+#define AMDGPU_SVM_RESTORE_WQ_NAME "amdgpu_svm_restore"
+
+static void
+amdgpu_svm_range_enqueue(struct amdgpu_svm *svm,
+ struct amdgpu_svm_range *range,
+ unsigned long start,
+ unsigned long last,
+ enum amdgpu_svm_range_queue_op op);
+
+static inline bool
+range_has_access(enum amdgpu_svm_attr_access access)
+{
+ return access == AMDGPU_SVM_ACCESS_ENABLE ||
+ access == AMDGPU_SVM_ACCESS_IN_PLACE;
+}
+
+static void
+range_invalidate_gpu_mapping(struct drm_gpusvm_range *range)
+{
+ WRITE_ONCE(to_amdgpu_svm_range(range)->gpu_mapped, false);
+}
+
+static bool
+range_attr_match(struct drm_gpusvm_range *range,
+ const struct amdgpu_svm_attrs *attrs,
+ uint64_t pte_flags)
+{
+ struct amdgpu_svm_range *r = to_amdgpu_svm_range(range);
+
+ if (!READ_ONCE(r->gpu_mapped))
+ return false;
+
+ return READ_ONCE(r->pte_flags) == pte_flags &&
+ READ_ONCE(r->attr_flags) == attrs->flags;
+}
+
+static bool
+range_pages_valid(struct amdgpu_svm *svm,
+ struct drm_gpusvm_range *range)
+{
+ lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+ if (range->pages.flags.unmapped || range->pages.flags.partial_unmap)
+ return false;
+
+ return drm_gpusvm_range_pages_valid(&svm->gpusvm, range);
+}
+
+static uint64_t
+amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm,
+ const struct amdgpu_svm_attrs *attrs)
+{
+ /* WA/POC: a simple pte flags func */
+ uint32_t gc_ip_version = amdgpu_ip_version(svm->adev, GC_HWIP, 0);
+ uint32_t flags = attrs->flags;
+ uint32_t mapping_flags = 0;
+ uint64_t pte_flags;
+ bool coherent = flags & (AMDGPU_SVM_FLAG_COHERENT |
+ AMDGPU_SVM_FLAG_EXT_COHERENT);
+ bool ext_coherent = flags & AMDGPU_SVM_FLAG_EXT_COHERENT;
+ bool snoop = true;
+ unsigned int mtype_local;
+
+ switch (gc_ip_version) {
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ if (ext_coherent)
+ mtype_local = AMDGPU_VM_MTYPE_CC;
+ else
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC :
+ AMDGPU_VM_MTYPE_RW;
+ if (svm->adev->flags & AMD_IS_APU) {
+ if (num_possible_nodes() <= 1)
+ mapping_flags |= mtype_local;
+ else
+ mapping_flags |= ext_coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ } else {
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ break;
+ default:
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ break;
+ }
+
+ if (flags & AMDGPU_SVM_FLAG_GPU_EXEC)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM;
+ pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
+ pte_flags |= AMDGPU_PTE_IS_PTE;
+
+ amdgpu_gmc_get_vm_pte(svm->adev, svm->vm, NULL, mapping_flags, &pte_flags);
+ pte_flags |= AMDGPU_PTE_READABLE;
+ if (!(flags & AMDGPU_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
+
+ return pte_flags;
+}
+
+static int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec)
+{
+ int ret;
+
+ drm_exec_init(exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(exec) {
+ ret = amdgpu_vm_lock_pd(svm->vm, exec, 1);
+ drm_exec_retry_on_contention(exec);
+ if (ret) {
+ drm_exec_fini(exec);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_svm_range_update_gpu(struct amdgpu_svm *svm, unsigned long start_page,
+ unsigned long last_page, uint64_t pte_flags,
+ dma_addr_t *pages_addr, bool flush_tlb,
+ bool update_pdes, bool wait_fence)
+{
+ struct drm_exec exec;
+ struct dma_fence *fence = NULL;
+ int ret;
+
+ ret = amdgpu_svm_range_lock_vm_pd(svm, &exec);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, false,
+ flush_tlb, true,
+ NULL, start_page, last_page, pte_flags, 0, 0,
+ NULL, pages_addr, wait_fence ? &fence : NULL);
+ if (!ret && wait_fence && fence) {
+ ret = dma_fence_wait(fence, false);
+ if (ret < 0)
+ AMDGPU_SVM_TRACE("wait unmap fence failed: ret=%d [0x%lx-0x%lx]-0x%lx\n",
+ ret, start_page, last_page,
+ last_page - start_page + 1);
+ }
+ if (!ret && update_pdes)
+ ret = amdgpu_vm_update_pdes(svm->adev, svm->vm, false);
+
+ dma_fence_put(fence);
+ drm_exec_fini(&exec);
+ return ret;
+}
+
+static int
+amdgpu_svm_range_update_gpu_range(struct amdgpu_svm *svm,
+ struct drm_gpusvm_range *range,
+ uint64_t pte_flags,
+ bool flush_tlb,
+ bool wait_fence,
+ struct dma_fence **fence)
+{
+ lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+ const unsigned long range_start_page = drm_gpusvm_range_start(range) >> PAGE_SHIFT;
+ const unsigned long range_end_page = drm_gpusvm_range_end(range) >> PAGE_SHIFT;
+ const unsigned long npages = range_end_page - range_start_page;
+ unsigned long mapped_pages = 0;
+ unsigned long dma_idx = 0;
+ int ret;
+
+ if (!range->pages.dma_addr || !npages)
+ return -EINVAL;
+
+ while (mapped_pages < npages) {
+ const struct drm_pagemap_addr *entry = &range->pages.dma_addr[dma_idx++];
+ unsigned long seg_pages = min_t(unsigned long, 1UL << entry->order,
+ npages - mapped_pages);
+ dma_addr_t seg_addr = entry->addr;
+ unsigned long start_page, last_page;
+ bool is_last_seg;
+
+ if (entry->proto != DRM_INTERCONNECT_SYSTEM)
+ return -EOPNOTSUPP;
+
+ while (mapped_pages + seg_pages < npages) {
+ const struct drm_pagemap_addr *next = &range->pages.dma_addr[dma_idx];
+ unsigned long next_pages = min_t(unsigned long,
+ 1UL << next->order,
+ npages - (mapped_pages + seg_pages));
+
+ if (next->proto != entry->proto ||
+ next->addr != seg_addr + ((dma_addr_t)seg_pages << PAGE_SHIFT))
+ break;
+
+ seg_pages += next_pages;
+ dma_idx++;
+ }
+
+ start_page = range_start_page + mapped_pages;
+ last_page = start_page + seg_pages - 1;
+ is_last_seg = mapped_pages + seg_pages == npages;
+
+ ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, false,
+ flush_tlb && is_last_seg, true, NULL,
+ start_page, last_page, pte_flags,
+ 0, seg_addr, NULL, NULL,
+ wait_fence && is_last_seg ? fence : NULL);
+ if (ret)
+ return ret;
+
+ mapped_pages += seg_pages;
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_svm_range_map(struct amdgpu_svm *svm,
+ unsigned long start,
+ unsigned long end,
+ const struct amdgpu_svm_attrs *attrs,
+ const struct drm_gpusvm_ctx *gpusvm_ctx,
+ uint64_t pte_flags)
+{
+ unsigned long addr = start;
+ int ret;
+
+ while (addr < end) {
+ struct drm_exec exec;
+ struct drm_gpusvm_ctx map_ctx;
+ struct drm_gpusvm_range *range;
+ struct dma_fence *fence = NULL;
+ unsigned long vma_start;
+ unsigned long next_addr;
+ uint64_t range_pte_flags;
+ unsigned int flags;
+ bool skip_map;
+
+ vma_start = drm_gpusvm_find_vma_start(&svm->gpusvm, addr, end);
+ if (vma_start > addr)
+ return -EFAULT;
+
+ map_ctx = *gpusvm_ctx;
+retry:
+ range = drm_gpusvm_range_find_or_insert(&svm->gpusvm, addr,
+ vma_start, end,
+ &map_ctx);
+ if (IS_ERR(range)) {
+ ret = PTR_ERR(range);
+ /*
+ * drm gpu svm deny RO when VMA is writeable
+ * but some UMD test does not set RO in readonly MM VMA
+ * so set read only when ret == -EPERM and retry
+ */
+ if (ret == -EPERM && !map_ctx.read_only) {
+ map_ctx.read_only = true;
+ goto retry;
+ }
+ return ret;
+ }
+
+ next_addr = drm_gpusvm_range_end(range);
+ if (next_addr <= addr)
+ return -EINVAL;
+
+ range_pte_flags = map_ctx.read_only ?
+ (pte_flags & ~AMDGPU_PTE_WRITEABLE) : pte_flags;
+
+ skip_map = range_attr_match(range, attrs, range_pte_flags);
+
+ AMDGPU_SVM_TRACE("range_map: [0x%lx-0x%lx] skip=%d pte=0x%llx\n",
+ addr, next_addr, skip_map ? 1 : 0, range_pte_flags);
+
+ if (!skip_map) {
+ ret = drm_gpusvm_range_get_pages(&svm->gpusvm, range, &map_ctx);
+ if (ret)
+ return ret;
+ }
+
+ ret = amdgpu_svm_range_lock_vm_pd(svm, &exec);
+ if (ret)
+ return ret;
+
+ flags = memalloc_noreclaim_save();
+ drm_gpusvm_notifier_lock(&svm->gpusvm);
+ if (skip_map) {
+ /* slow path must validate under notifier lock */
+ if (!range_attr_match(range, attrs, range_pte_flags) ||
+ !range_pages_valid(svm, range)) {
+ range_invalidate_gpu_mapping(range);
+ ret = -EAGAIN;
+ } else {
+ ret = 0;
+ }
+ } else if (!range_pages_valid(svm, range)) {
+ /* not protected by mmap lock, maybe changed by mmu notifier */
+ ret = -EAGAIN;
+ } else {
+ ret = amdgpu_svm_range_update_gpu_range(svm, range,
+ range_pte_flags,
+ true, true, &fence);
+ }
+ drm_gpusvm_notifier_unlock(&svm->gpusvm);
+ memalloc_noreclaim_restore(flags);
+
+ if (!ret && fence)
+ dma_fence_wait(fence, false);
+
+ dma_fence_put(fence);
+
+ if (!ret)
+ ret = amdgpu_vm_update_pdes(svm->adev, svm->vm, false);
+ if (!ret) {
+ svm->flush_tlb(svm);
+ WRITE_ONCE(to_amdgpu_svm_range(range)->pte_flags, range_pte_flags);
+ WRITE_ONCE(to_amdgpu_svm_range(range)->attr_flags, attrs->flags);
+ WRITE_ONCE(to_amdgpu_svm_range(range)->gpu_mapped, true);
+ }
+ drm_exec_fini(&exec);
+
+ if (ret)
+ return ret;
+
+ addr = next_addr;
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_svm_range_map_interval(struct amdgpu_svm *svm, unsigned long start_page,
+ unsigned long last_page,
+ const struct amdgpu_svm_attrs *attrs)
+{
+ struct drm_gpusvm_ctx gpusvm_ctx = {
+ .read_only = !!(attrs->flags & AMDGPU_SVM_FLAG_GPU_RO),
+ };
+ unsigned long start = start_page << PAGE_SHIFT;
+ unsigned long end = (last_page + 1) << PAGE_SHIFT;
+ uint64_t pte_flags;
+ int ret;
+
+ pte_flags = amdgpu_svm_range_attr_pte_flags(svm, attrs);
+
+ ret = amdgpu_svm_range_map(svm, start, end, attrs, &gpusvm_ctx,
+ pte_flags);
+ if (ret)
+ AMDGPU_SVM_TRACE("map_interval failed: ret=%d [0x%lx-0x%lx)-0x%lx\n",
+ ret, start, end, end - start);
+
+ return ret;
+}
+
+int
+amdgpu_svm_range_map_attr_ranges(struct amdgpu_svm *svm,
+ unsigned long start_page,
+ unsigned long last_page)
+{
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree;
+ unsigned long cursor = start_page;
+
+ while (cursor <= last_page) {
+ struct amdgpu_svm_attrs attrs;
+ unsigned long seg_last;
+ unsigned long next;
+ int ret;
+
+ mutex_lock(&attr_tree->lock);
+ amdgpu_svm_attr_lookup_page_locked(attr_tree, cursor, &attrs,
+ &seg_last);
+ mutex_unlock(&attr_tree->lock);
+
+ seg_last = min(seg_last, last_page);
+ if (range_has_access(attrs.access)) {
+ /* map may fail here cause no vma or access deny */
+ ret = amdgpu_svm_range_map_interval(svm, cursor, seg_last,
+ &attrs);
+ if (ret)
+ return ret;
+ }
+
+ if (seg_last == ULONG_MAX || seg_last == last_page)
+ break;
+
+ next = seg_last + 1;
+ if (next <= cursor)
+ break;
+ cursor = next;
+ }
+
+ return 0;
+}
+
+int amdgpu_svm_range_apply_attr_change(struct amdgpu_svm *svm,
+ unsigned long start,
+ unsigned long last,
+ uint32_t trigger,
+ const struct amdgpu_svm_attrs *prev_attrs,
+ const struct amdgpu_svm_attrs *new_attrs)
+{
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ bool old_access, new_access;
+ bool update_mapping = false;
+
+ old_access = range_has_access(prev_attrs->access);
+ new_access = range_has_access(new_attrs->access);
+
+ AMDGPU_SVM_TRACE("attr change trigger=0x%x old_access=%d new_access=%d [0x%lx-0x%lx]-0x%lx, xnack=%d\n",
+ trigger, old_access, new_access, start, last, last - start + 1,
+ svm->xnack_enabled ? 1 : 0);
+
+ if (trigger & AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE) {
+ if (!new_access && old_access) {
+ /*
+ * Do nothing align with kfd svm
+ * TODO: unmap ranges from GPU that lost access
+ */
+ AMDGPU_SVM_TRACE("skip unmap ioctl operation [0x%lx-0x%lx]-0x%lx\n",
+ start, last, last - start + 1);
+ } else if (new_access) {
+ if (NEED_REBUILD(svm) ||
+ (new_attrs->flags & AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED))
+ update_mapping = true;
+ }
+ }
+
+ if ((trigger & (AMDGPU_SVM_ATTR_TRIGGER_PTE_FLAG_CHANGE |
+ AMDGPU_SVM_ATTR_TRIGGER_MAPPING_FLAG_CHANGE)) &&
+ new_access)
+ update_mapping = true;
+
+ if (trigger & AMDGPU_SVM_ATTR_TRIGGER_LOCATION_CHANGE) {
+ /* TODO: add migration */
+ }
+
+ if (!update_mapping)
+ return 0;
+
+ AMDGPU_SVM_TRACE("mapping update: remap interval [0x%lx-0x%lx]-0x%lx\n",
+ start, last, last - start + 1);
+ return amdgpu_svm_range_map_interval(svm, start, last, new_attrs);
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 08/12] drm/amdgpu: implement SVM range notifier and invalidation
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (6 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 07/12] drm/amdgpu: implement SVM range PTE flags and GPU mapping Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 09/12] drm/amdgpu: implement SVM range workers Honglei Huang
` (3 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Implement MMU notifier handling and range lifecycle management:
- GPU unmap in notifier context: synchronous PTE clear with
memalloc_noreclaim protection and fence wait
- Range removal: unmap pages via drm_gpusvm, invalidate GPU mapping,
remove from gpusvm
- Overlap removal: iterate notifiers and ranges in an interval,
remove all overlapping ranges, track rebuild bounds
- Rebuild: remove overlapping ranges then remap via attr tree or
clear GPU PTEs with TLB flush
- Notifier range processing: walk ranges in a notifier for an MMU
event, clear PTEs and/or queue work depending on event type
- MMU invalidation dispatcher: classify events (unmap vs other),
determine operation (clear PTE, queue interval), trigger restore
for non-xnack mode
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 253 ++++++++++++++++++
1 file changed, 253 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
index b3bd4e2e6..eba0a52be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -114,6 +114,57 @@ range_pages_valid(struct amdgpu_svm *svm,
return drm_gpusvm_range_pages_valid(&svm->gpusvm, range);
}
+static int
+amdgpu_svm_range_gpu_unmap_in_notifier(struct amdgpu_svm *svm,
+ struct drm_gpusvm_range *range,
+ const struct mmu_notifier_range *mmu_range)
+{
+ struct dma_fence *fence = NULL;
+ unsigned long start = max(drm_gpusvm_range_start(range), mmu_range->start);
+ unsigned long end = min(drm_gpusvm_range_end(range), mmu_range->end);
+ unsigned int flags;
+ int ret;
+
+ if (end <= start)
+ return 0;
+
+ start >>= PAGE_SHIFT;
+ end = (end - 1) >> PAGE_SHIFT;
+
+ flags = memalloc_noreclaim_save();
+ ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, true, true, false,
+ NULL, start, end, 0, 0, 0, NULL,
+ NULL, &fence);
+ memalloc_noreclaim_restore(flags);
+
+ if (!ret && fence) {
+ ret = dma_fence_wait(fence, false);
+ if (ret < 0)
+ AMDGPU_SVM_TRACE("notifier unmap fence wait failed: ret=%d [0x%lx-0x%lx]-0x%lx\n",
+ ret, start, end,
+ end - start + 1);
+ }
+
+ dma_fence_put(fence);
+ return ret;
+}
+
+static bool
+has_always_mapped_range(
+ struct drm_gpusvm_notifier *notifier,
+ const struct mmu_notifier_range *mmu_range)
+{
+ struct drm_gpusvm_range *range = NULL;
+
+ drm_gpusvm_for_each_range(range, notifier, mmu_range->start, mmu_range->end) {
+ if (READ_ONCE(to_amdgpu_svm_range(range)->attr_flags) &
+ AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED)
+ return true;
+ }
+
+ return false;
+}
+
static uint64_t
amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm,
const struct amdgpu_svm_attrs *attrs)
@@ -487,6 +538,163 @@ amdgpu_svm_range_map_attr_ranges(struct amdgpu_svm *svm,
return 0;
}
+static void amdgpu_svm_range_remove(struct amdgpu_svm *svm,
+ struct drm_gpusvm_range *range,
+ struct drm_gpusvm_ctx *ctx)
+{
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ if (!range->pages.flags.unmapped && !range->pages.flags.partial_unmap)
+ drm_gpusvm_range_unmap_pages(&svm->gpusvm, range, ctx);
+
+ range_invalidate_gpu_mapping(range);
+ drm_gpusvm_range_remove(&svm->gpusvm, range);
+}
+
+static bool
+amdgpu_svm_range_remove_overlaps(struct amdgpu_svm *svm, unsigned long start_page,
+ unsigned long last_page,
+ unsigned long *rebuild_start,
+ unsigned long *rebuild_last)
+{
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = false,
+ };
+ unsigned long start = start_page << PAGE_SHIFT;
+ unsigned long end = (last_page + 1) << PAGE_SHIFT;
+ struct drm_gpusvm_notifier *notifier, *next_notifier;
+ bool removed = false;
+
+ if (rebuild_start && rebuild_last) {
+ *rebuild_start = ULONG_MAX;
+ *rebuild_last = 0;
+ }
+
+ /* remove overlap ranges, need to remove entire range */
+ drm_gpusvm_for_each_notifier_safe(notifier, next_notifier, &svm->gpusvm,
+ start, end) {
+ struct drm_gpusvm_range *range, *next_range;
+
+ drm_gpusvm_for_each_range_safe(range, next_range, notifier, start,
+ end) {
+ unsigned long rs = drm_gpusvm_range_start(range) >> PAGE_SHIFT;
+ unsigned long rl = (drm_gpusvm_range_end(range) >> PAGE_SHIFT) - 1;
+
+ removed = true;
+ /* record rebuild start end, first range start and last range end */
+ if (rebuild_start && rebuild_last) {
+ *rebuild_start = min(*rebuild_start, rs);
+ *rebuild_last = max(*rebuild_last, rl);
+ }
+ amdgpu_svm_range_remove(svm, range, &ctx);
+ }
+ }
+
+ return removed;
+}
+
+static int amdgpu_svm_range_rebuild_locked(struct amdgpu_svm *svm,
+ unsigned long start_page,
+ unsigned long last_page,
+ bool rebuild)
+{
+ unsigned long rebuild_start = start_page;
+ unsigned long rebuild_last = last_page;
+ bool removed;
+ int ret;
+
+ lockdep_assert_held_write(&svm->svm_lock);
+
+ AMDGPU_SVM_TRACE("remove and rebuild: [0x%lx-0x%lx] rebuild=%d\n",
+ start_page, last_page, rebuild ? 1 : 0);
+
+ removed = amdgpu_svm_range_remove_overlaps(svm, start_page, last_page,
+ &rebuild_start,
+ &rebuild_last);
+ if (!removed)
+ return 0;
+
+ /* scan rebuild start end to build the extra removed ranges */
+ if (rebuild)
+ return amdgpu_svm_range_map_attr_ranges(svm, rebuild_start,
+ rebuild_last);
+
+ ret = amdgpu_svm_range_update_gpu(svm, rebuild_start, rebuild_last,
+ 0, NULL, true, true, true);
+ if (!ret)
+ svm->flush_tlb(svm);
+
+ return ret;
+}
+
+static void
+amdgpu_svm_range_process_notifier_ranges(struct amdgpu_svm *svm,
+ struct drm_gpusvm_notifier *notifier,
+ const struct mmu_notifier_range *mmu_range,
+ uint32_t notifier_op,
+ enum amdgpu_svm_range_queue_op queue_op)
+{
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ };
+ struct drm_gpusvm_range *range = NULL;
+ bool queue_ranges = notifier_op & AMDGPU_SVM_RANGE_NOTIFIER_QUEUE_INTERVAL;
+ bool clear_pte = notifier_op & AMDGPU_SVM_RANGE_NOTIFIER_CLEAR_PTE;
+ bool is_unmap = mmu_range->event == MMU_NOTIFY_UNMAP;
+ bool has_range = false;
+
+ lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+ drm_gpusvm_for_each_range(range, notifier, mmu_range->start, mmu_range->end) {
+ has_range = true;
+ if (clear_pte) {
+ amdgpu_svm_range_gpu_unmap_in_notifier(svm, range,
+ mmu_range);
+ range_invalidate_gpu_mapping(range);
+ }
+
+ drm_gpusvm_range_unmap_pages(&svm->gpusvm, range, &ctx);
+ if (is_unmap)
+ drm_gpusvm_range_set_unmapped(range, mmu_range);
+
+ if (queue_ranges) {
+ unsigned long start = max(drm_gpusvm_range_start(range),
+ mmu_range->start) >> PAGE_SHIFT;
+ unsigned long last = (min(drm_gpusvm_range_end(range),
+ mmu_range->end) - 1) >> PAGE_SHIFT;
+
+ amdgpu_svm_range_enqueue(svm, to_amdgpu_svm_range(range),
+ start, last, queue_op);
+ }
+ }
+
+ if (has_range && clear_pte)
+ svm->flush_tlb(svm);
+}
+
+static bool
+amdgpu_svm_range_interval_has_range(struct amdgpu_svm *svm,
+ unsigned long start_page,
+ unsigned long last_page)
+{
+ lockdep_assert_held(&svm->svm_lock);
+
+ unsigned long start = start_page << PAGE_SHIFT;
+ unsigned long end = (last_page + 1) << PAGE_SHIFT;
+ struct drm_gpusvm_notifier *notifier;
+
+ drm_gpusvm_for_each_notifier(notifier, &svm->gpusvm, start, end) {
+ struct drm_gpusvm_range *range = NULL;
+
+ drm_gpusvm_for_each_range(range, notifier, start, end)
+ return true;
+ }
+
+ return false;
+}
+
int amdgpu_svm_range_apply_attr_change(struct amdgpu_svm *svm,
unsigned long start,
unsigned long last,
@@ -537,3 +745,48 @@ int amdgpu_svm_range_apply_attr_change(struct amdgpu_svm *svm,
start, last, last - start + 1);
return amdgpu_svm_range_map_interval(svm, start, last, new_attrs);
}
+
+static void amdgpu_svm_range_begin_restore(struct amdgpu_svm *svm)
+{
+ if (atomic_inc_return(&svm->evicted_ranges) != 1)
+ return;
+
+ svm->begin_restore(svm);
+}
+
+void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm,
+ struct drm_gpusvm_notifier *notifier,
+ const struct mmu_notifier_range *mmu_range)
+{
+ bool is_unmap = mmu_range->event == MMU_NOTIFY_UNMAP;
+ uint32_t op;
+ enum amdgpu_svm_range_queue_op queue_op;
+
+ if (mmu_range->event == MMU_NOTIFY_RELEASE)
+ return;
+ if (atomic_read(&svm->exiting))
+ return;
+
+ if (!drm_gpusvm_range_find(notifier, mmu_range->start,
+ mmu_range->end))
+ return;
+
+ if (is_unmap) {
+ op = AMDGPU_SVM_RANGE_NOTIFIER_CLEAR_PTE |
+ AMDGPU_SVM_RANGE_NOTIFIER_QUEUE_INTERVAL;
+ queue_op = AMDGPU_SVM_RANGE_OP_UNMAP;
+ if (NEED_REBUILD(svm))
+ amdgpu_svm_range_begin_restore(svm);
+ } else if (NEED_REBUILD(svm) ||
+ has_always_mapped_range(notifier, mmu_range)) {
+ op = AMDGPU_SVM_RANGE_NOTIFIER_QUEUE_INTERVAL;
+ queue_op = AMDGPU_SVM_RANGE_OP_RESTORE;
+ amdgpu_svm_range_begin_restore(svm);
+ } else {
+ op = AMDGPU_SVM_RANGE_NOTIFIER_CLEAR_PTE;
+ queue_op = AMDGPU_SVM_RANGE_OP_RESTORE;
+ }
+
+ amdgpu_svm_range_process_notifier_ranges(svm, notifier, mmu_range,
+ op, queue_op);
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 09/12] drm/amdgpu: implement SVM range workers
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (7 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 08/12] drm/amdgpu: implement SVM range notifier and invalidation Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 10/12] drm/amdgpu: implement SVM core initialization and fini Honglei Huang
` (2 subsequent siblings)
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
- KFD queue quiesce/resume: reuse kfd api
- GC worker: processes unmap events by clearing attributes and
rebuilding GPU mappings, queue into restore queue if rebuild failed.
- Restore worker: restore evicted ranges via attr tree lookup
- Flush/sync helpers for orderly shutdown
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 404 ++++++++++++++++++
1 file changed, 404 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
index eba0a52be..472a641fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -114,6 +114,7 @@ range_pages_valid(struct amdgpu_svm *svm,
return drm_gpusvm_range_pages_valid(&svm->gpusvm, range);
}
+
static int
amdgpu_svm_range_gpu_unmap_in_notifier(struct amdgpu_svm *svm,
struct drm_gpusvm_range *range,
@@ -246,6 +247,59 @@ amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm,
return pte_flags;
}
+ /*
+ * POC/WA: reuse kfd apis for queue quiesce/resume
+ * But kfd apis are for process level, not for GPU VM level
+ * need consider potential issues
+ */
+void amdgpu_svm_range_restore_begin_compute(struct amdgpu_svm *svm)
+{
+ int ret;
+
+ if (!svm->gpusvm.mm)
+ return;
+
+ if (atomic_cmpxchg(&svm->kfd_queues_quiesced, 0, 1) != 0)
+ return;
+
+ ret = kgd2kfd_quiesce_mm(svm->gpusvm.mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
+ if (ret == -ESRCH) {
+ AMDGPU_SVM_TRACE("kfd quiesce skipped no KFD process\n");
+ atomic_set(&svm->kfd_queues_quiesced, 0);
+ return;
+ }
+
+ if (ret) {
+ AMDGPU_SVM_TRACE("kfd quiesce failed ret=%d\n", ret);
+ atomic_set(&svm->kfd_queues_quiesced, 0);
+ return;
+ }
+
+ AMDGPU_SVM_TRACE("kfd quiesce ret=%d\n", ret);
+}
+
+void amdgpu_svm_range_restore_end_compute(struct amdgpu_svm *svm)
+{
+ int ret;
+
+ if (atomic_cmpxchg(&svm->kfd_queues_quiesced, 1, 0) != 1)
+ return;
+
+ if (!svm->gpusvm.mm)
+ return;
+
+ ret = kgd2kfd_resume_mm(svm->gpusvm.mm);
+ if (ret == -ESRCH) {
+ AMDGPU_SVM_TRACE("kfd resume skipped no KFD process\n");
+ return;
+ }
+
+ if (ret)
+ AMDGPU_SVM_TRACE("kfd resume failed ret=%d\n", ret);
+ else
+ AMDGPU_SVM_TRACE("kfd resume ret=%d\n", ret);
+}
+
static int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec)
{
int ret;
@@ -746,6 +800,169 @@ int amdgpu_svm_range_apply_attr_change(struct amdgpu_svm *svm,
return amdgpu_svm_range_map_interval(svm, start, last, new_attrs);
}
+static bool
+range_dequeue_locked(struct amdgpu_svm *svm,
+ struct list_head *work_list,
+ bool restore_queue,
+ struct range_pending_op_ctx *op_ctx)
+{
+ struct amdgpu_svm_range *range;
+
+ lockdep_assert_held(&svm->gc_lock);
+
+ range = list_first_entry_or_null(work_list, struct amdgpu_svm_range,
+ gc_node);
+ if (!range)
+ return false;
+
+ list_del_init(&range->gc_node);
+ if (restore_queue)
+ range->restore_queued = false;
+ else
+ range->gc_queued = false;
+
+ op_ctx->range = range;
+ op_ctx->start = range->pending_start;
+ op_ctx->last = range->pending_last;
+ op_ctx->pending_ops = range->pending_ops;
+
+ range->pending_start = ULONG_MAX;
+ range->pending_last = 0;
+ range->pending_ops = AMDGPU_SVM_RANGE_PENDING_OP_NONE;
+
+ return true;
+}
+
+static void
+range_requeue_restore_locked(struct amdgpu_svm *svm,
+ struct amdgpu_svm_range *range,
+ unsigned long start,
+ unsigned long last)
+{
+ lockdep_assert_held(&svm->gc_lock);
+
+ range->pending_start = min(range->pending_start, start);
+ range->pending_last = max(range->pending_last, last);
+ range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE;
+
+ if (!range->gc_queued && !range->restore_queued) {
+ list_add_tail(&range->gc_node, &svm->restore_work_list);
+ range->restore_queued = true;
+ }
+}
+
+static bool
+range_try_dequeue(struct amdgpu_svm_range *range)
+{
+ if (!range->in_queue)
+ return false;
+
+ if (range->gc_queued || range->restore_queued ||
+ range->pending_start <= range->pending_last ||
+ range->pending_ops != AMDGPU_SVM_RANGE_PENDING_OP_NONE)
+ return false;
+
+ range->in_queue = false;
+ return true;
+}
+
+static void
+range_put_if_dequeued(struct amdgpu_svm *svm,
+ struct amdgpu_svm_range *range)
+{
+ bool dequeue;
+
+ spin_lock(&svm->gc_lock);
+ dequeue = range_try_dequeue(range);
+ spin_unlock(&svm->gc_lock);
+
+ if (dequeue)
+ drm_gpusvm_range_put(&range->base);
+}
+
+static void
+amdgpu_svm_range_enqueue(struct amdgpu_svm *svm,
+ struct amdgpu_svm_range *range,
+ unsigned long start,
+ unsigned long last,
+ enum amdgpu_svm_range_queue_op op)
+{
+ bool queue_gc_work = false;
+ bool queue_restore_work = false;
+
+ if (atomic_read(&svm->exiting))
+ return;
+
+ spin_lock(&svm->gc_lock);
+ if (!range->in_queue) {
+ drm_gpusvm_range_get(&range->base);
+ range->in_queue = true;
+ }
+
+ range->pending_start = min(range->pending_start, start);
+ range->pending_last = max(range->pending_last, last);
+
+ switch (op) {
+ case AMDGPU_SVM_RANGE_OP_UNMAP:
+ range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_UNMAP;
+ if (NEED_REBUILD(svm))
+ range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE;
+ break;
+ case AMDGPU_SVM_RANGE_OP_RESTORE:
+ range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE;
+ break;
+ }
+
+ if (UNMAP_WORK(range->pending_ops)) {
+ if (range->restore_queued) {
+ list_move_tail(&range->gc_node, &svm->gc_list);
+ range->restore_queued = false;
+ range->gc_queued = true;
+ } else if (!range->gc_queued) {
+ list_add_tail(&range->gc_node, &svm->gc_list);
+ range->gc_queued = true;
+ }
+ queue_gc_work = true;
+ } else if (RESTORE_WORK(range->pending_ops)) {
+ if (!range->gc_queued && !range->restore_queued) {
+ list_add_tail(&range->gc_node, &svm->restore_work_list);
+ range->restore_queued = true;
+ }
+ queue_restore_work = true;
+ }
+
+ spin_unlock(&svm->gc_lock);
+
+ if (queue_gc_work)
+ queue_work(svm->gc_wq, &svm->gc_work);
+ if (queue_restore_work)
+ queue_delayed_work(svm->restore_wq, &svm->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+}
+
+static int
+amdgpu_svm_range_process_unmap_interval(struct amdgpu_svm *svm,
+ unsigned long start, unsigned long last,
+ bool rebuild)
+{
+ int ret = 0;
+
+ down_write(&svm->svm_lock);
+ /* clean attrs */
+ amdgpu_svm_attr_clear_pages(svm->attr_tree, start, last);
+
+ /* rebuild if needed */
+ if (amdgpu_svm_range_interval_has_range(svm, start, last))
+ ret = amdgpu_svm_range_rebuild_locked(svm, start, last, rebuild);
+
+ up_write(&svm->svm_lock);
+
+ AMDGPU_SVM_TRACE("work=UNMAP ret=%d start=0x%lx last=0x%lx rebuild=%d\n",
+ ret, start, last, rebuild ? 1 : 0);
+
+ return ret;
+}
+
static void amdgpu_svm_range_begin_restore(struct amdgpu_svm *svm)
{
if (atomic_inc_return(&svm->evicted_ranges) != 1)
@@ -754,6 +971,121 @@ static void amdgpu_svm_range_begin_restore(struct amdgpu_svm *svm)
svm->begin_restore(svm);
}
+static void amdgpu_svm_range_restore_worker(struct work_struct *w)
+{
+ struct delayed_work *dwork = to_delayed_work(w);
+ struct amdgpu_svm *svm = container_of(dwork, struct amdgpu_svm, restore_work);
+ unsigned long resched_delay =
+ max_t(unsigned long, 1,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ struct range_pending_op_ctx op_ctx;
+ int evicted_record;
+ bool need_resched = false;
+ bool has_pending;
+ int ret;
+
+ if (atomic_read(&svm->exiting))
+ return;
+
+ evicted_record = atomic_read(&svm->evicted_ranges);
+ if (!evicted_record)
+ return;
+
+ if (!svm->gpusvm.mm) {
+ atomic_set(&svm->evicted_ranges, 0);
+ svm->end_restore(svm);
+ return;
+ }
+
+ spin_lock(&svm->gc_lock);
+ while (range_dequeue_locked(svm, &svm->restore_work_list,
+ true, &op_ctx)) {
+ spin_unlock(&svm->gc_lock);
+
+ down_write(&svm->svm_lock);
+ ret = amdgpu_svm_range_map_attr_ranges(svm, op_ctx.start,
+ op_ctx.last);
+ up_write(&svm->svm_lock);
+
+ if (ret) {
+ AMDGPU_SVM_TRACE("restore work retry ret=%d start=0x%lx last=0x%lx ret=%d\n",
+ ret, op_ctx.start, op_ctx.last, ret);
+ spin_lock(&svm->gc_lock);
+ range_requeue_restore_locked(svm, op_ctx.range,
+ op_ctx.start, op_ctx.last);
+ spin_unlock(&svm->gc_lock);
+ need_resched = true;
+ }
+
+ range_put_if_dequeued(svm, op_ctx.range);
+ spin_lock(&svm->gc_lock);
+ }
+ spin_unlock(&svm->gc_lock);
+
+ spin_lock(&svm->gc_lock);
+ has_pending = !list_empty(&svm->restore_work_list) ||
+ !list_empty(&svm->gc_list);
+ spin_unlock(&svm->gc_lock);
+
+ if (!need_resched && !has_pending) {
+
+ drm_gpusvm_notifier_lock(&svm->gpusvm);
+ spin_lock(&svm->gc_lock);
+
+ has_pending = !list_empty(&svm->restore_work_list) || !list_empty(&svm->gc_list);
+
+ spin_unlock(&svm->gc_lock);
+
+ if (!has_pending &&
+ atomic_cmpxchg(&svm->evicted_ranges, evicted_record, 0) == evicted_record) {
+
+ drm_gpusvm_notifier_unlock(&svm->gpusvm);
+ svm->end_restore(svm);
+ return;
+
+ }
+ drm_gpusvm_notifier_unlock(&svm->gpusvm);
+ }
+
+ queue_delayed_work(svm->restore_wq, &svm->restore_work, resched_delay);
+}
+
+static void amdgpu_svm_range_gc_worker(struct work_struct *w)
+{
+ struct amdgpu_svm *svm = container_of(w, struct amdgpu_svm, gc_work);
+ struct range_pending_op_ctx op_ctx;
+
+ spin_lock(&svm->gc_lock);
+ while (range_dequeue_locked(svm, &svm->gc_list,
+ false, &op_ctx)) {
+ int ret = 0;
+
+ spin_unlock(&svm->gc_lock);
+
+ if (UNMAP_WORK(op_ctx.pending_ops))
+ ret = amdgpu_svm_range_process_unmap_interval(svm,
+ op_ctx.start, op_ctx.last,
+ NEED_REBUILD(svm));
+
+ if (RESTORE_WORK(op_ctx.pending_ops)) {
+ /* queue into restore wq, if rebuild failed */
+ if (NEED_REBUILD(svm) && !ret)
+ queue_delayed_work(svm->restore_wq,
+ &svm->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ else
+ amdgpu_svm_range_enqueue(svm, op_ctx.range,
+ op_ctx.start,
+ op_ctx.last,
+ AMDGPU_SVM_RANGE_OP_RESTORE);
+ }
+
+ range_put_if_dequeued(svm, op_ctx.range);
+ spin_lock(&svm->gc_lock);
+ }
+ spin_unlock(&svm->gc_lock);
+}
+
void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
@@ -790,3 +1122,75 @@ void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm,
amdgpu_svm_range_process_notifier_ranges(svm, notifier, mmu_range,
op, queue_op);
}
+
+int amdgpu_svm_range_work_init(struct amdgpu_svm *svm)
+{
+ svm->gc_wq = alloc_workqueue(AMDGPU_SVM_RANGE_WQ_NAME,
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
+ if (!svm->gc_wq)
+ return -ENOMEM;
+
+ svm->restore_wq = alloc_ordered_workqueue(AMDGPU_SVM_RESTORE_WQ_NAME,
+ WQ_HIGHPRI | WQ_MEM_RECLAIM);
+ if (!svm->restore_wq) {
+ destroy_workqueue(svm->gc_wq);
+ svm->gc_wq = NULL;
+ return -ENOMEM;
+ }
+
+ init_rwsem(&svm->svm_lock);
+ spin_lock_init(&svm->gc_lock);
+ INIT_LIST_HEAD(&svm->gc_list);
+ INIT_LIST_HEAD(&svm->restore_work_list);
+ INIT_WORK(&svm->gc_work, amdgpu_svm_range_gc_worker);
+ INIT_DELAYED_WORK(&svm->restore_work, amdgpu_svm_range_restore_worker);
+
+ return 0;
+}
+
+void amdgpu_svm_range_flush(struct amdgpu_svm *svm)
+{
+ flush_work(&svm->gc_work);
+ flush_delayed_work(&svm->restore_work);
+ flush_work(&svm->gc_work);
+}
+
+void amdgpu_svm_range_sync_work(struct amdgpu_svm *svm)
+{
+ amdgpu_svm_range_flush(svm);
+ flush_workqueue(svm->gc_wq);
+ flush_workqueue(svm->restore_wq);
+}
+
+static void
+amdgpu_svm_range_clean_queue(struct amdgpu_svm *svm,
+ struct list_head *work_list,
+ bool restore_queue)
+{
+ struct range_pending_op_ctx op_ctx;
+
+ spin_lock(&svm->gc_lock);
+ while (range_dequeue_locked(svm, work_list,
+ restore_queue, &op_ctx)) {
+ spin_unlock(&svm->gc_lock);
+ range_put_if_dequeued(svm, op_ctx.range);
+ spin_lock(&svm->gc_lock);
+ }
+ spin_unlock(&svm->gc_lock);
+}
+
+void amdgpu_svm_range_work_fini(struct amdgpu_svm *svm)
+{
+ cancel_delayed_work_sync(&svm->restore_work);
+ flush_work(&svm->gc_work);
+ amdgpu_svm_range_clean_queue(svm, &svm->gc_list, false);
+ amdgpu_svm_range_clean_queue(svm, &svm->restore_work_list, true);
+ atomic_set(&svm->evicted_ranges, 0);
+ if (atomic_read(&svm->kfd_queues_quiesced))
+ svm->end_restore(svm);
+
+ destroy_workqueue(svm->restore_wq);
+ svm->restore_wq = NULL;
+ destroy_workqueue(svm->gc_wq);
+ svm->gc_wq = NULL;
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 10/12] drm/amdgpu: implement SVM core initialization and fini
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (8 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 09/12] drm/amdgpu: implement SVM range workers Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 11/12] drm/amdgpu: implement SVM ioctl and fault handler Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 12/12] drm/amdgpu: wire up SVM build system " Honglei Huang
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
- kmem_cache management for amdgpu_svm_range
- Reference counting: kref-based release for async safety
- XNACK helper.
- TLB flush helper for compute mode
- amdgpu_svm_init_with_ops: allocate SVM context, initialize
attr tree, work queues, and drm_gpusvm with configurable
chunk sizes and notifier size
- amdgpu_svm_init/close/fini: public lifecycle API
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c | 270 ++++++++++++++++++++++++
1 file changed, 270 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
new file mode 100644
index 000000000..aa40e1126
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_file.h>
+
+#include "amdgpu.h"
+#include "amdgpu_svm.h"
+#include "amdgpu_svm_attr.h"
+#include "amdgpu_svm_range.h"
+#include "amdgpu_vm.h"
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_SVM)
+
+#define AMDGPU_SVM_MAX_ATTRS 64
+#define AMDGPU_SVM_DEFAULT_SVM_NOTIFIER_SIZE 512
+
+static const unsigned long amdgpu_svm_chunk_sizes[] = {
+ SZ_2M,
+ SZ_64K,
+ SZ_4K,
+};
+
+static struct kmem_cache *amdgpu_svm_range_cache;
+
+static void amdgpu_svm_invalidate(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_notifier *notifier,
+ const struct mmu_notifier_range *mmu_range)
+{
+ amdgpu_svm_range_invalidate(to_amdgpu_svm(gpusvm), notifier, mmu_range);
+}
+
+static struct drm_gpusvm_range *amdgpu_svm_range_alloc(struct drm_gpusvm *gpusvm)
+{
+ struct amdgpu_svm_range *range;
+
+ range = kmem_cache_zalloc(amdgpu_svm_range_cache, GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ INIT_LIST_HEAD(&range->gc_node);
+ range->pending_start = ULONG_MAX;
+ return &range->base;
+}
+
+static void amdgpu_svm_range_free(struct drm_gpusvm_range *range)
+{
+ kmem_cache_free(amdgpu_svm_range_cache, to_amdgpu_svm_range(range));
+}
+
+static const struct drm_gpusvm_ops amdgpu_gpusvm_ops = {
+ .range_alloc = amdgpu_svm_range_alloc,
+ .range_free = amdgpu_svm_range_free,
+ .invalidate = amdgpu_svm_invalidate,
+};
+
+static void amdgpu_svm_release(struct kref *ref)
+{
+ kfree(container_of(ref, struct amdgpu_svm, refcount));
+}
+
+static void amdgpu_svm_put(struct amdgpu_svm *svm)
+{
+ if (svm)
+ kref_put(&svm->refcount, amdgpu_svm_release);
+}
+
+int amdgpu_svm_cache_init(void)
+{
+ int ret = 0;
+
+ if (amdgpu_svm_range_cache)
+ return 0;
+
+ amdgpu_svm_range_cache = AMDGPU_SVM_KMEM_CACHE_CREATE("amdgpu_svm_range_cache",
+ struct amdgpu_svm_range);
+ if (!amdgpu_svm_range_cache)
+ return -ENOMEM;
+
+ ret = amdgpu_svm_attr_cache_init();
+ if (ret)
+ goto free_out;
+
+ return 0;
+free_out:
+ amdgpu_svm_attr_cache_fini();
+ AMDGPU_SVM_KMEM_CACHE_DESTROY(amdgpu_svm_range_cache);
+ return ret;
+}
+
+void amdgpu_svm_cache_fini(void)
+{
+ if (!amdgpu_svm_range_cache)
+ return;
+
+ amdgpu_svm_attr_cache_fini();
+ AMDGPU_SVM_KMEM_CACHE_DESTROY(amdgpu_svm_range_cache);
+}
+
+static bool amdgpu_svm_default_xnack_enabled(struct amdgpu_device *adev)
+{
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (gc_ver < IP_VERSION(9, 0, 1))
+ return false;
+ if (!amdgpu_sriov_xnack_support(adev))
+ return false;
+
+ switch (gc_ver) {
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ return true;
+ default:
+ break;
+ }
+ if (gc_ver >= IP_VERSION(10, 1, 1))
+ return false;
+ return !adev->gmc.noretry;
+}
+
+static void amdgpu_svm_flush_tlb_compute(struct amdgpu_svm *svm)
+{
+ amdgpu_vm_flush_compute_tlb(svm->adev, svm->vm, TLB_FLUSH_HEAVYWEIGHT,
+ svm->adev->gfx.xcc_mask);
+}
+
+static int amdgpu_svm_init_with_ops(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ void (*begin_restore)(struct amdgpu_svm *),
+ void (*end_restore)(struct amdgpu_svm *),
+ void (*flush_tlb)(struct amdgpu_svm *))
+{
+ struct amdgpu_svm *svm;
+ int ret;
+
+ if (vm->svm)
+ return 0;
+
+ ret = amdgpu_svm_cache_init();
+ if (ret)
+ return ret;
+
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm)
+ return -ENOMEM;
+
+ kref_init(&svm->refcount);
+ svm->adev = adev;
+ svm->vm = vm;
+
+ svm->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x3f);
+ svm->xnack_enabled = amdgpu_svm_default_xnack_enabled(adev);
+ svm->xnack_enabled = false; // WA/POC: force to disable xnack
+ svm->begin_restore = begin_restore;
+ svm->end_restore = end_restore;
+ svm->flush_tlb = flush_tlb;
+ atomic_set(&svm->kfd_queues_quiesced, 0);
+ atomic_set(&svm->evicted_ranges, 0);
+ atomic_set(&svm->exiting, 0);
+
+ ret = amdgpu_svm_range_work_init(svm);
+ if (ret)
+ goto err_free;
+
+ svm->attr_tree = amdgpu_svm_attr_tree_create(svm);
+ if (!svm->attr_tree) {
+ ret = -ENOMEM;
+ goto err_range_work_fini;
+ }
+
+ ret = drm_gpusvm_init(&svm->gpusvm, "AMDGPU SVM",
+ adev_to_drm(adev), current->mm, 0,
+ adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_SVM_DEFAULT_SVM_NOTIFIER_SIZE * SZ_1M,
+ &amdgpu_gpusvm_ops,
+ amdgpu_svm_chunk_sizes,
+ ARRAY_SIZE(amdgpu_svm_chunk_sizes));
+
+ if (ret)
+ goto err_attr_tree_destroy;
+
+ drm_gpusvm_driver_set_lock(&svm->gpusvm, &svm->svm_lock);
+ vm->svm = svm;
+ return 0;
+
+err_attr_tree_destroy:
+ amdgpu_svm_attr_tree_destroy(svm->attr_tree);
+err_range_work_fini:
+ amdgpu_svm_range_work_fini(svm);
+err_free:
+ kfree(svm);
+ return ret;
+}
+
+static int amdgpu_svm_init_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ return amdgpu_svm_init_with_ops(adev, vm,
+ amdgpu_svm_range_restore_begin_compute,
+ amdgpu_svm_range_restore_end_compute,
+ amdgpu_svm_flush_tlb_compute);
+}
+
+int amdgpu_svm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ /* graphics svm init maybe different */
+
+ return amdgpu_svm_init_compute(adev, vm);
+}
+
+void amdgpu_svm_close(struct amdgpu_vm *vm)
+{
+ if (!vm->svm)
+ return;
+
+ if (atomic_xchg(&vm->svm->exiting, 1))
+ return;
+
+ amdgpu_svm_range_sync_work(vm->svm);
+}
+
+void amdgpu_svm_fini(struct amdgpu_vm *vm)
+{
+ struct amdgpu_svm *svm = vm->svm;
+
+ if (!svm)
+ return;
+
+ amdgpu_svm_close(vm);
+ down_write(&svm->svm_lock);
+ drm_gpusvm_fini(&svm->gpusvm);
+ up_write(&svm->svm_lock);
+
+ amdgpu_svm_range_work_fini(svm);
+ amdgpu_svm_attr_tree_destroy(svm->attr_tree);
+ vm->svm = NULL;
+ amdgpu_svm_put(svm);
+}
+
+bool amdgpu_svm_is_enabled(struct amdgpu_vm *vm)
+{
+ return vm->svm != NULL;
+}
+
+#endif /* CONFIG_DRM_AMDGPU_SVM */
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 11/12] drm/amdgpu: implement SVM ioctl and fault handler
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (9 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 10/12] drm/amdgpu: implement SVM core initialization and fini Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
2026-04-20 12:07 ` [RFC/POC PATCH 12/12] drm/amdgpu: wire up SVM build system " Honglei Huang
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Add the userspace and fault entry points for the SVM
- amdgpu_svm_lookup_by_pasid: look up SVM context from PASID via
vm_manager.pasids xarray with kref protection for async safety
- amdgpu_gem_svm_ioctl: ioctl handler that copies attributes from
userspace, validates page alignment and range, dispatches to
set_attr or get_attr, and copies results back for GET operations
- amdgpu_svm_handle_fault: GPU page fault handler that looks up
SVM by PASID, checks xnack and exiting state, then maps the
faulting page range via the attribute tree under svm_lock
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c | 160 ++++++++++++++++++++++++
1 file changed, 160 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
index aa40e1126..57103a140 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.c
@@ -89,6 +89,24 @@ static void amdgpu_svm_put(struct amdgpu_svm *svm)
kref_put(&svm->refcount, amdgpu_svm_release);
}
+static struct amdgpu_svm *
+amdgpu_svm_lookup_by_pasid(struct amdgpu_device *adev, uint32_t pasid)
+{
+ struct amdgpu_svm *svm = NULL;
+ struct amdgpu_vm *vm;
+ unsigned long irqflags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm && vm->svm) {
+ svm = vm->svm;
+ kref_get(&svm->refcount);
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+
+ return svm;
+}
+
int amdgpu_svm_cache_init(void)
{
int ret = 0;
@@ -121,6 +139,33 @@ void amdgpu_svm_cache_fini(void)
AMDGPU_SVM_KMEM_CACHE_DESTROY(amdgpu_svm_range_cache);
}
+static int amdgpu_svm_set_attr(struct amdgpu_vm *vm,
+ uint64_t start,
+ uint64_t size,
+ uint32_t nattr,
+ const struct drm_amdgpu_svm_attribute *attrs)
+{
+ struct amdgpu_svm *svm = vm->svm;
+
+ /* cause drm_gpusvm_range_find_or_insert acquire the mmap_read lock
+ * can not acquire the mmap lock in the entire time in ioctl
+ * just flush the work to reduce the probability of failure
+ */
+ amdgpu_svm_range_sync_work(svm);
+
+ return amdgpu_svm_attr_set(svm->attr_tree, start, size, nattr,
+ attrs);
+}
+
+static int amdgpu_svm_get_attr(struct amdgpu_vm *vm,
+ uint64_t start,
+ uint64_t size,
+ uint32_t nattr,
+ struct drm_amdgpu_svm_attribute *attrs)
+{
+ return amdgpu_svm_attr_get(vm->svm->attr_tree, start, size, nattr, attrs);
+}
+
static bool amdgpu_svm_default_xnack_enabled(struct amdgpu_device *adev)
{
uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
@@ -262,9 +307,124 @@ void amdgpu_svm_fini(struct amdgpu_vm *vm)
amdgpu_svm_put(svm);
}
+int amdgpu_svm_handle_fault(struct amdgpu_device *adev, uint32_t pasid,
+ uint64_t fault_addr, bool write_fault)
+{
+ struct amdgpu_svm *svm;
+ unsigned long fault_page;
+ int ret;
+
+ AMDGPU_SVM_TRACE("handle_fault enter: pasid=%u addr=0x%llx write=%d\n",
+ pasid, fault_addr, write_fault ? 1 : 0);
+
+ svm = amdgpu_svm_lookup_by_pasid(adev, pasid);
+ if (!svm) {
+ AMDGPU_SVM_TRACE("handle_fault: pasid %u lookup failed\n", pasid);
+ return -EOPNOTSUPP;
+ }
+
+ AMDGPU_SVM_TRACE("handle_fault: pasid %u svm=%p exiting=%d xnack=%d\n",
+ pasid, svm, atomic_read(&svm->exiting),
+ svm->xnack_enabled ? 1 : 0);
+
+ if (atomic_read(&svm->exiting)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (!svm->xnack_enabled) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ fault_page = fault_addr >> PAGE_SHIFT;
+ AMDGPU_SVM_TRACE("handle_fault: map_attr page=0x%lx\n", fault_page);
+
+ down_write(&svm->svm_lock);
+ ret = amdgpu_svm_range_map_attr_ranges(svm, fault_page, fault_page);
+ up_write(&svm->svm_lock);
+
+ if (ret)
+ AMDGPU_SVM_TRACE("fault map failed: ret=%d addr=0x%llx write=%d\n",
+ ret, fault_addr, write_fault ? 1 : 0);
+ else
+ AMDGPU_SVM_TRACE("fault map success: addr=0x%llx write=%d\n",
+ fault_addr, write_fault ? 1 : 0);
+
+out:
+ AMDGPU_SVM_TRACE("handle_fault exit: pasid=%u addr=0x%llx ret=%d\n",
+ pasid, fault_addr, ret);
+ amdgpu_svm_put(svm);
+ return ret;
+}
+
bool amdgpu_svm_is_enabled(struct amdgpu_vm *vm)
{
return vm->svm != NULL;
}
+static int amdgpu_svm_copy_attrs(const struct drm_amdgpu_gem_svm *args,
+ struct drm_amdgpu_svm_attribute **attrs,
+ size_t *size)
+{
+ if (!args->nattr || args->nattr > AMDGPU_SVM_MAX_ATTRS)
+ return -EINVAL;
+ if (!args->attrs_ptr)
+ return -EINVAL;
+
+ *size = args->nattr * sizeof(**attrs);
+ *attrs = memdup_user(u64_to_user_ptr(args->attrs_ptr), *size);
+
+ return PTR_ERR_OR_ZERO(*attrs);
+}
+
+int amdgpu_gem_svm_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct drm_amdgpu_gem_svm *args = data;
+ struct drm_amdgpu_svm_attribute *attrs = NULL;
+ struct amdgpu_vm *vm;
+ size_t attrs_size = 0;
+ int ret = 0;
+
+ AMDGPU_SVM_TRACE("ioctl op=%u va:[0x%llx-0x%llx)-0x%llx nattr=%u\n",
+ args->operation, args->start_addr, args->start_addr + args->size,
+ args->size, args->nattr);
+
+ vm = &fpriv->vm;
+ if (!amdgpu_svm_is_enabled(vm))
+ return -EOPNOTSUPP;
+
+ if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+ return -EINVAL;
+
+ if (!args->start_addr || !args->size)
+ return -EINVAL;
+
+ ret = amdgpu_svm_copy_attrs(args, &attrs, &attrs_size);
+ if (ret)
+ return ret;
+
+ switch (args->operation) {
+ case AMDGPU_SVM_OP_SET_ATTR:
+ ret = amdgpu_svm_set_attr(vm, args->start_addr, args->size,
+ args->nattr, attrs);
+ break;
+ case AMDGPU_SVM_OP_GET_ATTR:
+ ret = amdgpu_svm_get_attr(vm, args->start_addr, args->size,
+ args->nattr, attrs);
+ if (!ret && copy_to_user(u64_to_user_ptr(args->attrs_ptr),
+ attrs, attrs_size))
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ kvfree(attrs);
+ return ret;
+}
+
#endif /* CONFIG_DRM_AMDGPU_SVM */
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [RFC/POC PATCH 12/12] drm/amdgpu: wire up SVM build system and fault handler
2026-04-20 12:07 [RFC V2 00/12] drm/amdgpu: SVM implementation based on drm_gpusvm Honglei Huang
` (10 preceding siblings ...)
2026-04-20 12:07 ` [RFC/POC PATCH 11/12] drm/amdgpu: implement SVM ioctl and fault handler Honglei Huang
@ 2026-04-20 12:07 ` Honglei Huang
11 siblings, 0 replies; 37+ messages in thread
From: Honglei Huang @ 2026-04-20 12:07 UTC (permalink / raw)
To: Alexander.Deucher, Felix.Kuehling, Christian.Koenig, Oak.Zeng,
Jenny-Jing.Liu, Philip.Yang, Xiaogang.Chen, Ray.Huang,
Lingshan.Zhu, Junhua.Shen, matthew.brost, rodrigo.vivi,
thomas.hellstrom, dakr, aliceryhl
Cc: amd-gfx, dri-devel, honghuan
From: Honglei Huang <honghuan@amd.com>
Enable SVM compilation and integrate it into the VM subsystem:
Kconfig:
- Add CONFIG_DRM_AMDGPU_SVM option (depends on DRM_AMDGPU and
DEVICE_PRIVATE, selects DRM_GPUSVM, HMM_MIRROR, MMU_NOTIFIER)
Makefile:
- Build amdgpu_svm.o, amdgpu_svm_attr.o, amdgpu_svm_range.o when
CONFIG_DRM_AMDGPU_SVM is enabled
amdgpu_drv.c:
- Register DRM_IOCTL_AMDGPU_GEM_SVM in the ioctl table
amdgpu_vm.c:
- Initialize vm->svm = NULL in amdgpu_vm_init
- Call amdgpu_svm_init in amdgpu_vm_make_compute for compute VMs
- Call amdgpu_svm_close + amdgpu_svm_fini in amdgpu_vm_fini
- Integrate SVM fault handling in amdgpu_vm_handle_fault
Signed-off-by: Honglei Huang <honghuan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 11 +++++++
drivers/gpu/drm/amd/amdgpu/Makefile | 13 ++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 ++++++++++++++++++++++---
4 files changed, 62 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 1acfed2f9..22f679b85 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -74,6 +74,17 @@ config DRM_AMDGPU_USERPTR
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
+config DRM_AMDGPU_SVM
+ bool "Enable AMDGPU SVM support (experimental)"
+ depends on DRM_AMDGPU
+ depends on DEVICE_PRIVATE
+ select DRM_GPUSVM
+ select HMM_MIRROR
+ select MMU_NOTIFIER
+ default y
+ help
+ Experimental SVM support based on DRM GPUSVM.
+
config DRM_AMD_ISP
bool "Enable AMD Image Signal Processor IP support"
depends on DRM_AMDGPU && ACPI
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 64e7acff8..6507d9a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -43,6 +43,10 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
subdir-ccflags-y += -Wno-override-init
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
+ifneq ($(wildcard $(objtree)/drivers/gpu/drm/Module.symvers),)
+KBUILD_EXTRA_SYMBOLS += $(objtree)/drivers/gpu/drm/Module.symvers
+endif
+
amdgpu-y := amdgpu_drv.o
# add KMS driver
@@ -303,6 +307,15 @@ amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
+# svm support
+amdgpu-$(CONFIG_DRM_AMDGPU_SVM) += amdgpu_svm.o amdgpu_svm_attr.o \
+ amdgpu_svm_range.o
+
+.PHONY: clean-svm
+clean-svm:
+ rm -f $(obj)/amdgpu_svm.o $(obj)/amdgpu_svm_attr.o $(obj)/amdgpu_svm_range.o \
+ $(obj)/.amdgpu_svm.o.cmd $(obj)/.amdgpu_svm_attr.o.cmd $(obj)/.amdgpu_svm_range.o.cmd
+
include $(FULL_AMD_PATH)/pm/Makefile
amdgpu-y += $(AMD_POWERPLAY_FILES)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7333e1929..12b587f9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -50,6 +50,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_reset.h"
#include "amdgpu_sched.h"
+#include "amdgpu_svm.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_userq.h"
#include "amdgpu_userq_fence.h"
@@ -3068,6 +3069,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_SVM, amdgpu_gem_svm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 676e24fb8..f64392117 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -43,6 +43,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_res_cursor.h"
+#include "amdgpu_svm.h"
#include "kfd_svm.h"
/**
@@ -2564,6 +2565,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int r, i;
vm->va = RB_ROOT_CACHED;
+ vm->svm = NULL;
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
INIT_LIST_HEAD(&vm->evicted);
@@ -2722,6 +2724,10 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
+ r = amdgpu_svm_init(adev, vm);
+ if (r)
+ goto unreserve_bo;
+
unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;
@@ -2754,6 +2760,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
unsigned long flags;
int i;
+ amdgpu_svm_close(vm);
+ amdgpu_svm_fini(vm);
+
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
root = amdgpu_bo_ref(vm->root.bo);
@@ -2939,8 +2948,10 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
bool write_fault)
{
bool is_compute_context = false;
+ bool has_svm = false;
struct amdgpu_bo *root;
unsigned long irqflags;
+ uint64_t fault_addr = addr;
uint64_t value, flags;
struct amdgpu_vm *vm;
int r;
@@ -2950,6 +2961,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (vm) {
root = amdgpu_bo_ref(vm->root.bo);
is_compute_context = vm->is_compute_context;
+ has_svm = !!vm->svm;
} else {
root = NULL;
}
@@ -2960,12 +2972,30 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
- if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr, ts, write_fault)) {
- amdgpu_bo_unref(&root);
- return true;
+ pr_debug("vm_handle_fault: pasid=%u addr=0x%llx compute=%d has_svm=%d write=%d\n",
+ pasid, fault_addr, is_compute_context, has_svm, write_fault);
+
+ if (is_compute_context && has_svm) {
+ r = amdgpu_svm_handle_fault(adev, pasid, fault_addr, write_fault);
+ pr_debug("vm_handle_fault: svm_handle_fault returned %d\n", r);
+ if (!r) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
}
+ if (is_compute_context && !has_svm) {
+ r = svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr, ts, write_fault);
+ pr_debug("vm_handle_fault: kfd svm_range_restore_pages returned %d\n", r);
+ if (!r) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
+ }
+
+ pr_debug("vm_handle_fault: SVM paths exhausted, falling through to NORETRY path\n");
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@@ -3020,6 +3050,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
error_unref:
amdgpu_bo_unref(&root);
+ pr_debug("vm_handle_fault: returning false (unhandled) pasid=%u addr=0x%llx\n",
+ pasid, fault_addr);
return false;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 37+ messages in thread