* [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
@ 2025-10-25 12:03 ` Thomas Hellström
2025-10-29 0:31 ` Matthew Brost
2025-10-29 1:11 ` Matthew Brost
2025-10-25 12:03 ` [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd Thomas Hellström
` (13 subsequent siblings)
14 siblings, 2 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:03 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
With the end goal of being able to free unused pagemaps
and allocate them on demand, add a refcount to struct drm_pagemap,
remove the xe embedded drm_pagemap, allocating and freeing it
explicitly.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 51 ++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_svm.c | 26 ++++++++++-----
drivers/gpu/drm/xe/xe_vram_types.h | 2 +-
include/drm/drm_pagemap.h | 36 +++++++++++++++++++++
4 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 22c44807e3fe..4b8692f0b2a2 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -538,6 +538,57 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
return -ENOMEM;
}
+static void drm_pagemap_release(struct kref *ref)
+{
+ struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
+
+ kfree(dpagemap);
+}
+
+/**
+ * drm_pagemap_create() - Create a struct drm_pagemap.
+ * @dev: Pointer to a struct device providing the device-private memory.
+ * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
+ * @ops: Pointer to the struct drm_pagemap_ops.
+ *
+ * Allocate and initialize a struct drm_pagemap.
+ *
+ * Return: A refcounted pointer to a struct drm_pagemap on success.
+ * Error pointer on error.
+ */
+struct drm_pagemap *
+drm_pagemap_create(struct device *dev,
+ struct dev_pagemap *pagemap,
+ const struct drm_pagemap_ops *ops)
+{
+ struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
+
+ if (!dpagemap)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&dpagemap->ref);
+ dpagemap->dev = dev;
+ dpagemap->ops = ops;
+ dpagemap->pagemap = pagemap;
+
+ return dpagemap;
+}
+EXPORT_SYMBOL(drm_pagemap_create);
+
+/**
+ * drm_pagemap_put() - Put a struct drm_pagemap reference
+ * @dpagemap: Pointer to a struct drm_pagemap object.
+ *
+ * Puts a struct drm_pagemap reference and frees the drm_pagemap object
+ * if the refount reaches zero.
+ */
+void drm_pagemap_put(struct drm_pagemap *dpagemap)
+{
+ if (likely(dpagemap))
+ kref_put(&dpagemap->ref, drm_pagemap_release);
+}
+EXPORT_SYMBOL(drm_pagemap_put);
+
/**
* drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
* @devmem_allocation: Pointer to the device memory allocation
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 129e7818565c..6d2c6c144315 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -861,7 +861,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct mm_struct *mm,
unsigned long timeslice_ms)
{
- struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
+ struct xe_vram_region *vr = container_of(dpagemap->pagemap, typeof(*vr), pagemap);
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
struct drm_buddy_block *block;
@@ -1372,7 +1372,7 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
{
- return &tile->mem.vram->dpagemap;
+ return tile->mem.vram->dpagemap;
}
/**
@@ -1482,6 +1482,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return ret;
}
+ vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
+ &xe_drm_pagemap_ops);
+ if (IS_ERR(vr->dpagemap)) {
+ drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
+ tile->id, vr->dpagemap);
+ ret = PTR_ERR(vr->dpagemap);
+ goto out_no_dpagemap;
+ }
+
vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
vr->pagemap.range.start = res->start;
vr->pagemap.range.end = res->end;
@@ -1489,22 +1498,23 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
vr->pagemap.owner = xe_svm_devm_owner(xe);
addr = devm_memremap_pages(dev, &vr->pagemap);
-
- vr->dpagemap.dev = dev;
- vr->dpagemap.ops = &xe_drm_pagemap_ops;
-
if (IS_ERR(addr)) {
- devm_release_mem_region(dev, res->start, resource_size(res));
ret = PTR_ERR(addr);
drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
tile->id, ERR_PTR(ret));
- return ret;
+ goto out_failed_memremap;
}
vr->hpa_base = res->start;
drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
return 0;
+
+out_failed_memremap:
+ drm_pagemap_put(vr->dpagemap);
+out_no_dpagemap:
+ devm_release_mem_region(dev, res->start, resource_size(res));
+ return ret;
}
#else
int xe_svm_alloc_vram(struct xe_tile *tile,
diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
index 83772dcbf1af..c0d2c5ee8c10 100644
--- a/drivers/gpu/drm/xe/xe_vram_types.h
+++ b/drivers/gpu/drm/xe/xe_vram_types.h
@@ -72,7 +72,7 @@ struct xe_vram_region {
* @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
* pages of this tile.
*/
- struct drm_pagemap dpagemap;
+ struct drm_pagemap *dpagemap;
/**
* @hpa_base: base host physical address
*
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index f6e7e234c089..2c7de928865b 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -129,11 +129,15 @@ struct drm_pagemap_ops {
* struct drm_pagemap: Additional information for a struct dev_pagemap
* used for device p2p handshaking.
* @ops: The struct drm_pagemap_ops.
+ * @ref: Reference count.
* @dev: The struct drevice owning the device-private memory.
+ * @pagemap: Pointer to the underlying dev_pagemap.
*/
struct drm_pagemap {
const struct drm_pagemap_ops *ops;
+ struct kref ref;
struct device *dev;
+ struct dev_pagemap *pagemap;
};
struct drm_pagemap_devmem;
@@ -202,6 +206,37 @@ struct drm_pagemap_devmem_ops {
unsigned long npages);
};
+struct drm_pagemap *drm_pagemap_create(struct device *dev,
+ struct dev_pagemap *pagemap,
+ const struct drm_pagemap_ops *ops);
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+
+void drm_pagemap_put(struct drm_pagemap *dpagemap);
+
+#else
+
+static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_DRM_GPUSVM) */
+
+/**
+ * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
+ * @dpagemap: Pointer to the struct drm_pagemap.
+ *
+ * Return: Pointer to the struct drm_pagemap.
+ */
+static inline struct drm_pagemap *
+drm_pagemap_get(struct drm_pagemap *dpagemap)
+{
+ if (likely(dpagemap))
+ kref_get(&dpagemap->ref);
+
+ return dpagemap;
+}
+
/**
* struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
*
@@ -246,3 +281,4 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
unsigned long timeslice_ms);
#endif
+
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap
2025-10-25 12:03 ` [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
@ 2025-10-29 0:31 ` Matthew Brost
2025-10-29 1:11 ` Matthew Brost
1 sibling, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 0:31 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:03:58PM +0200, Thomas Hellström wrote:
> With the end goal of being able to free unused pagemaps
> and allocate them on demand, add a refcount to struct drm_pagemap,
> remove the xe embedded drm_pagemap, allocating and freeing it
> explicitly.
>
I think techincally we can leak a drm_pagemap in this patch by itself,
but since the ref counting scheme is refined later in the series, likely
not worth adding drmm/devm handler to drop this reference.
Anyways, patch itself look good to me:
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/drm_pagemap.c | 51 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_svm.c | 26 ++++++++++-----
> drivers/gpu/drm/xe/xe_vram_types.h | 2 +-
> include/drm/drm_pagemap.h | 36 +++++++++++++++++++++
> 4 files changed, 106 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 22c44807e3fe..4b8692f0b2a2 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -538,6 +538,57 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> return -ENOMEM;
> }
>
> +static void drm_pagemap_release(struct kref *ref)
> +{
> + struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
> +
> + kfree(dpagemap);
> +}
> +
> +/**
> + * drm_pagemap_create() - Create a struct drm_pagemap.
> + * @dev: Pointer to a struct device providing the device-private memory.
> + * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
> + * @ops: Pointer to the struct drm_pagemap_ops.
> + *
> + * Allocate and initialize a struct drm_pagemap.
> + *
> + * Return: A refcounted pointer to a struct drm_pagemap on success.
> + * Error pointer on error.
> + */
> +struct drm_pagemap *
> +drm_pagemap_create(struct device *dev,
> + struct dev_pagemap *pagemap,
> + const struct drm_pagemap_ops *ops)
> +{
> + struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> +
> + if (!dpagemap)
> + return ERR_PTR(-ENOMEM);
> +
> + kref_init(&dpagemap->ref);
> + dpagemap->dev = dev;
> + dpagemap->ops = ops;
> + dpagemap->pagemap = pagemap;
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_create);
> +
> +/**
> + * drm_pagemap_put() - Put a struct drm_pagemap reference
> + * @dpagemap: Pointer to a struct drm_pagemap object.
> + *
> + * Puts a struct drm_pagemap reference and frees the drm_pagemap object
> + * if the refount reaches zero.
> + */
> +void drm_pagemap_put(struct drm_pagemap *dpagemap)
> +{
> + if (likely(dpagemap))
> + kref_put(&dpagemap->ref, drm_pagemap_release);
> +}
> +EXPORT_SYMBOL(drm_pagemap_put);
> +
> /**
> * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
> * @devmem_allocation: Pointer to the device memory allocation
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 129e7818565c..6d2c6c144315 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -861,7 +861,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms)
> {
> - struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
> + struct xe_vram_region *vr = container_of(dpagemap->pagemap, typeof(*vr), pagemap);
> struct xe_device *xe = vr->xe;
> struct device *dev = xe->drm.dev;
> struct drm_buddy_block *block;
> @@ -1372,7 +1372,7 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
>
> static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
> {
> - return &tile->mem.vram->dpagemap;
> + return tile->mem.vram->dpagemap;
> }
>
> /**
> @@ -1482,6 +1482,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> return ret;
> }
>
> + vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> + &xe_drm_pagemap_ops);
> + if (IS_ERR(vr->dpagemap)) {
> + drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
> + tile->id, vr->dpagemap);
> + ret = PTR_ERR(vr->dpagemap);
> + goto out_no_dpagemap;
> + }
> +
> vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> vr->pagemap.range.start = res->start;
> vr->pagemap.range.end = res->end;
> @@ -1489,22 +1498,23 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
> vr->pagemap.owner = xe_svm_devm_owner(xe);
> addr = devm_memremap_pages(dev, &vr->pagemap);
> -
> - vr->dpagemap.dev = dev;
> - vr->dpagemap.ops = &xe_drm_pagemap_ops;
> -
> if (IS_ERR(addr)) {
> - devm_release_mem_region(dev, res->start, resource_size(res));
> ret = PTR_ERR(addr);
> drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
> tile->id, ERR_PTR(ret));
> - return ret;
> + goto out_failed_memremap;
> }
> vr->hpa_base = res->start;
>
> drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
> tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
> return 0;
> +
> +out_failed_memremap:
> + drm_pagemap_put(vr->dpagemap);
> +out_no_dpagemap:
> + devm_release_mem_region(dev, res->start, resource_size(res));
> + return ret;
> }
> #else
> int xe_svm_alloc_vram(struct xe_tile *tile,
> diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
> index 83772dcbf1af..c0d2c5ee8c10 100644
> --- a/drivers/gpu/drm/xe/xe_vram_types.h
> +++ b/drivers/gpu/drm/xe/xe_vram_types.h
> @@ -72,7 +72,7 @@ struct xe_vram_region {
> * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
> * pages of this tile.
> */
> - struct drm_pagemap dpagemap;
> + struct drm_pagemap *dpagemap;
> /**
> * @hpa_base: base host physical address
> *
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index f6e7e234c089..2c7de928865b 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -129,11 +129,15 @@ struct drm_pagemap_ops {
> * struct drm_pagemap: Additional information for a struct dev_pagemap
> * used for device p2p handshaking.
> * @ops: The struct drm_pagemap_ops.
> + * @ref: Reference count.
> * @dev: The struct drevice owning the device-private memory.
> + * @pagemap: Pointer to the underlying dev_pagemap.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> + struct kref ref;
> struct device *dev;
> + struct dev_pagemap *pagemap;
> };
>
> struct drm_pagemap_devmem;
> @@ -202,6 +206,37 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> +struct drm_pagemap *drm_pagemap_create(struct device *dev,
> + struct dev_pagemap *pagemap,
> + const struct drm_pagemap_ops *ops);
> +
> +#if IS_ENABLED(CONFIG_DRM_GPUSVM)
> +
> +void drm_pagemap_put(struct drm_pagemap *dpagemap);
> +
> +#else
> +
> +static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
> +{
> +}
> +
> +#endif /* IS_ENABLED(CONFIG_DRM_GPUSVM) */
> +
> +/**
> + * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> + * @dpagemap: Pointer to the struct drm_pagemap.
> + *
> + * Return: Pointer to the struct drm_pagemap.
> + */
> +static inline struct drm_pagemap *
> +drm_pagemap_get(struct drm_pagemap *dpagemap)
> +{
> + if (likely(dpagemap))
> + kref_get(&dpagemap->ref);
> +
> + return dpagemap;
> +}
> +
> /**
> * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
> *
> @@ -246,3 +281,4 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> unsigned long timeslice_ms);
>
> #endif
> +
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap
2025-10-25 12:03 ` [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
2025-10-29 0:31 ` Matthew Brost
@ 2025-10-29 1:11 ` Matthew Brost
2025-10-29 14:51 ` Thomas Hellström
1 sibling, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 1:11 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:03:58PM +0200, Thomas Hellström wrote:
> With the end goal of being able to free unused pagemaps
> and allocate them on demand, add a refcount to struct drm_pagemap,
> remove the xe embedded drm_pagemap, allocating and freeing it
> explicitly.
>
General commit for the series — could we add some kernel documentation,
ideally in xe_svm.c, that explains the reference counting scheme used
for drm_pagemap?
For example:
- An SVM VM holds a drm_pagemap reference to local pagemaps.
- madvise VMAs hold a reference to the preferred location pagemap.
- Allocated device pages hold a reference to the pagemap.
- The pagemap itself holds a reference to the device/module.
Reference counting schemes can be difficult to reverse-engineer and easy
to forget, so it would be best to document them clearly.
Matt
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/drm_pagemap.c | 51 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_svm.c | 26 ++++++++++-----
> drivers/gpu/drm/xe/xe_vram_types.h | 2 +-
> include/drm/drm_pagemap.h | 36 +++++++++++++++++++++
> 4 files changed, 106 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 22c44807e3fe..4b8692f0b2a2 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -538,6 +538,57 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> return -ENOMEM;
> }
>
> +static void drm_pagemap_release(struct kref *ref)
> +{
> + struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
> +
> + kfree(dpagemap);
> +}
> +
> +/**
> + * drm_pagemap_create() - Create a struct drm_pagemap.
> + * @dev: Pointer to a struct device providing the device-private memory.
> + * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
> + * @ops: Pointer to the struct drm_pagemap_ops.
> + *
> + * Allocate and initialize a struct drm_pagemap.
> + *
> + * Return: A refcounted pointer to a struct drm_pagemap on success.
> + * Error pointer on error.
> + */
> +struct drm_pagemap *
> +drm_pagemap_create(struct device *dev,
> + struct dev_pagemap *pagemap,
> + const struct drm_pagemap_ops *ops)
> +{
> + struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> +
> + if (!dpagemap)
> + return ERR_PTR(-ENOMEM);
> +
> + kref_init(&dpagemap->ref);
> + dpagemap->dev = dev;
> + dpagemap->ops = ops;
> + dpagemap->pagemap = pagemap;
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_create);
> +
> +/**
> + * drm_pagemap_put() - Put a struct drm_pagemap reference
> + * @dpagemap: Pointer to a struct drm_pagemap object.
> + *
> + * Puts a struct drm_pagemap reference and frees the drm_pagemap object
> + * if the refount reaches zero.
> + */
> +void drm_pagemap_put(struct drm_pagemap *dpagemap)
> +{
> + if (likely(dpagemap))
> + kref_put(&dpagemap->ref, drm_pagemap_release);
> +}
> +EXPORT_SYMBOL(drm_pagemap_put);
> +
> /**
> * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
> * @devmem_allocation: Pointer to the device memory allocation
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 129e7818565c..6d2c6c144315 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -861,7 +861,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms)
> {
> - struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
> + struct xe_vram_region *vr = container_of(dpagemap->pagemap, typeof(*vr), pagemap);
> struct xe_device *xe = vr->xe;
> struct device *dev = xe->drm.dev;
> struct drm_buddy_block *block;
> @@ -1372,7 +1372,7 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
>
> static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
> {
> - return &tile->mem.vram->dpagemap;
> + return tile->mem.vram->dpagemap;
> }
>
> /**
> @@ -1482,6 +1482,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> return ret;
> }
>
> + vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> + &xe_drm_pagemap_ops);
> + if (IS_ERR(vr->dpagemap)) {
> + drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
> + tile->id, vr->dpagemap);
> + ret = PTR_ERR(vr->dpagemap);
> + goto out_no_dpagemap;
> + }
> +
> vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> vr->pagemap.range.start = res->start;
> vr->pagemap.range.end = res->end;
> @@ -1489,22 +1498,23 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
> vr->pagemap.owner = xe_svm_devm_owner(xe);
> addr = devm_memremap_pages(dev, &vr->pagemap);
> -
> - vr->dpagemap.dev = dev;
> - vr->dpagemap.ops = &xe_drm_pagemap_ops;
> -
> if (IS_ERR(addr)) {
> - devm_release_mem_region(dev, res->start, resource_size(res));
> ret = PTR_ERR(addr);
> drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
> tile->id, ERR_PTR(ret));
> - return ret;
> + goto out_failed_memremap;
> }
> vr->hpa_base = res->start;
>
> drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
> tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
> return 0;
> +
> +out_failed_memremap:
> + drm_pagemap_put(vr->dpagemap);
> +out_no_dpagemap:
> + devm_release_mem_region(dev, res->start, resource_size(res));
> + return ret;
> }
> #else
> int xe_svm_alloc_vram(struct xe_tile *tile,
> diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
> index 83772dcbf1af..c0d2c5ee8c10 100644
> --- a/drivers/gpu/drm/xe/xe_vram_types.h
> +++ b/drivers/gpu/drm/xe/xe_vram_types.h
> @@ -72,7 +72,7 @@ struct xe_vram_region {
> * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
> * pages of this tile.
> */
> - struct drm_pagemap dpagemap;
> + struct drm_pagemap *dpagemap;
> /**
> * @hpa_base: base host physical address
> *
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index f6e7e234c089..2c7de928865b 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -129,11 +129,15 @@ struct drm_pagemap_ops {
> * struct drm_pagemap: Additional information for a struct dev_pagemap
> * used for device p2p handshaking.
> * @ops: The struct drm_pagemap_ops.
> + * @ref: Reference count.
> * @dev: The struct drevice owning the device-private memory.
> + * @pagemap: Pointer to the underlying dev_pagemap.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> + struct kref ref;
> struct device *dev;
> + struct dev_pagemap *pagemap;
> };
>
> struct drm_pagemap_devmem;
> @@ -202,6 +206,37 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> +struct drm_pagemap *drm_pagemap_create(struct device *dev,
> + struct dev_pagemap *pagemap,
> + const struct drm_pagemap_ops *ops);
> +
> +#if IS_ENABLED(CONFIG_DRM_GPUSVM)
> +
> +void drm_pagemap_put(struct drm_pagemap *dpagemap);
> +
> +#else
> +
> +static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
> +{
> +}
> +
> +#endif /* IS_ENABLED(CONFIG_DRM_GPUSVM) */
> +
> +/**
> + * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> + * @dpagemap: Pointer to the struct drm_pagemap.
> + *
> + * Return: Pointer to the struct drm_pagemap.
> + */
> +static inline struct drm_pagemap *
> +drm_pagemap_get(struct drm_pagemap *dpagemap)
> +{
> + if (likely(dpagemap))
> + kref_get(&dpagemap->ref);
> +
> + return dpagemap;
> +}
> +
> /**
> * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
> *
> @@ -246,3 +281,4 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> unsigned long timeslice_ms);
>
> #endif
> +
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap
2025-10-29 1:11 ` Matthew Brost
@ 2025-10-29 14:51 ` Thomas Hellström
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-29 14:51 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 18:11 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:03:58PM +0200, Thomas Hellström wrote:
> > With the end goal of being able to free unused pagemaps
> > and allocate them on demand, add a refcount to struct drm_pagemap,
> > remove the xe embedded drm_pagemap, allocating and freeing it
> > explicitly.
> >
>
> General commit for the series — could we add some kernel
> documentation,
> ideally in xe_svm.c, that explains the reference counting scheme used
> for drm_pagemap?
>
> For example:
>
> - An SVM VM holds a drm_pagemap reference to local pagemaps.
> - madvise VMAs hold a reference to the preferred location pagemap.
> - Allocated device pages hold a reference to the pagemap.
> - The pagemap itself holds a reference to the device/module.
>
> Reference counting schemes can be difficult to reverse-engineer and
> easy
> to forget, so it would be best to document them clearly.
Sure. Good idea.
/Thomas
>
> Matt
>
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/drm_pagemap.c | 51
> > ++++++++++++++++++++++++++++++
> > drivers/gpu/drm/xe/xe_svm.c | 26 ++++++++++-----
> > drivers/gpu/drm/xe/xe_vram_types.h | 2 +-
> > include/drm/drm_pagemap.h | 36 +++++++++++++++++++++
> > 4 files changed, 106 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > b/drivers/gpu/drm/drm_pagemap.c
> > index 22c44807e3fe..4b8692f0b2a2 100644
> > --- a/drivers/gpu/drm/drm_pagemap.c
> > +++ b/drivers/gpu/drm/drm_pagemap.c
> > @@ -538,6 +538,57 @@ static int
> > drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> > return -ENOMEM;
> > }
> >
> > +static void drm_pagemap_release(struct kref *ref)
> > +{
> > + struct drm_pagemap *dpagemap = container_of(ref,
> > typeof(*dpagemap), ref);
> > +
> > + kfree(dpagemap);
> > +}
> > +
> > +/**
> > + * drm_pagemap_create() - Create a struct drm_pagemap.
> > + * @dev: Pointer to a struct device providing the device-private
> > memory.
> > + * @pagemap: Pointer to a pre-setup struct dev_pagemap providing
> > the struct pages.
> > + * @ops: Pointer to the struct drm_pagemap_ops.
> > + *
> > + * Allocate and initialize a struct drm_pagemap.
> > + *
> > + * Return: A refcounted pointer to a struct drm_pagemap on
> > success.
> > + * Error pointer on error.
> > + */
> > +struct drm_pagemap *
> > +drm_pagemap_create(struct device *dev,
> > + struct dev_pagemap *pagemap,
> > + const struct drm_pagemap_ops *ops)
> > +{
> > + struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap),
> > GFP_KERNEL);
> > +
> > + if (!dpagemap)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + kref_init(&dpagemap->ref);
> > + dpagemap->dev = dev;
> > + dpagemap->ops = ops;
> > + dpagemap->pagemap = pagemap;
> > +
> > + return dpagemap;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_create);
> > +
> > +/**
> > + * drm_pagemap_put() - Put a struct drm_pagemap reference
> > + * @dpagemap: Pointer to a struct drm_pagemap object.
> > + *
> > + * Puts a struct drm_pagemap reference and frees the drm_pagemap
> > object
> > + * if the refount reaches zero.
> > + */
> > +void drm_pagemap_put(struct drm_pagemap *dpagemap)
> > +{
> > + if (likely(dpagemap))
> > + kref_put(&dpagemap->ref, drm_pagemap_release);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_put);
> > +
> > /**
> > * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
> > * @devmem_allocation: Pointer to the device memory allocation
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 129e7818565c..6d2c6c144315 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -861,7 +861,7 @@ static int xe_drm_pagemap_populate_mm(struct
> > drm_pagemap *dpagemap,
> > struct mm_struct *mm,
> > unsigned long timeslice_ms)
> > {
> > - struct xe_vram_region *vr = container_of(dpagemap,
> > typeof(*vr), dpagemap);
> > + struct xe_vram_region *vr = container_of(dpagemap-
> > >pagemap, typeof(*vr), pagemap);
> > struct xe_device *xe = vr->xe;
> > struct device *dev = xe->drm.dev;
> > struct drm_buddy_block *block;
> > @@ -1372,7 +1372,7 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct
> > xe_vm *vm, u64 start, u64 end)
> >
> > static struct drm_pagemap *tile_local_pagemap(struct xe_tile
> > *tile)
> > {
> > - return &tile->mem.vram->dpagemap;
> > + return tile->mem.vram->dpagemap;
> > }
> >
> > /**
> > @@ -1482,6 +1482,15 @@ int xe_devm_add(struct xe_tile *tile, struct
> > xe_vram_region *vr)
> > return ret;
> > }
> >
> > + vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> > + &xe_drm_pagemap_ops);
> > + if (IS_ERR(vr->dpagemap)) {
> > + drm_err(&xe->drm, "Failed to create drm_pagemap
> > tile %d memory: %pe\n",
> > + tile->id, vr->dpagemap);
> > + ret = PTR_ERR(vr->dpagemap);
> > + goto out_no_dpagemap;
> > + }
> > +
> > vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> > vr->pagemap.range.start = res->start;
> > vr->pagemap.range.end = res->end;
> > @@ -1489,22 +1498,23 @@ int xe_devm_add(struct xe_tile *tile,
> > struct xe_vram_region *vr)
> > vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
> > vr->pagemap.owner = xe_svm_devm_owner(xe);
> > addr = devm_memremap_pages(dev, &vr->pagemap);
> > -
> > - vr->dpagemap.dev = dev;
> > - vr->dpagemap.ops = &xe_drm_pagemap_ops;
> > -
> > if (IS_ERR(addr)) {
> > - devm_release_mem_region(dev, res->start,
> > resource_size(res));
> > ret = PTR_ERR(addr);
> > drm_err(&xe->drm, "Failed to remap tile %d memory,
> > errno %pe\n",
> > tile->id, ERR_PTR(ret));
> > - return ret;
> > + goto out_failed_memremap;
> > }
> > vr->hpa_base = res->start;
> >
> > drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to
> > devm, remapped to %pr\n",
> > tile->id, vr->io_start, vr->io_start + vr-
> > >usable_size, res);
> > return 0;
> > +
> > +out_failed_memremap:
> > + drm_pagemap_put(vr->dpagemap);
> > +out_no_dpagemap:
> > + devm_release_mem_region(dev, res->start,
> > resource_size(res));
> > + return ret;
> > }
> > #else
> > int xe_svm_alloc_vram(struct xe_tile *tile,
> > diff --git a/drivers/gpu/drm/xe/xe_vram_types.h
> > b/drivers/gpu/drm/xe/xe_vram_types.h
> > index 83772dcbf1af..c0d2c5ee8c10 100644
> > --- a/drivers/gpu/drm/xe/xe_vram_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vram_types.h
> > @@ -72,7 +72,7 @@ struct xe_vram_region {
> > * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE
> > memory
> > * pages of this tile.
> > */
> > - struct drm_pagemap dpagemap;
> > + struct drm_pagemap *dpagemap;
> > /**
> > * @hpa_base: base host physical address
> > *
> > diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> > index f6e7e234c089..2c7de928865b 100644
> > --- a/include/drm/drm_pagemap.h
> > +++ b/include/drm/drm_pagemap.h
> > @@ -129,11 +129,15 @@ struct drm_pagemap_ops {
> > * struct drm_pagemap: Additional information for a struct
> > dev_pagemap
> > * used for device p2p handshaking.
> > * @ops: The struct drm_pagemap_ops.
> > + * @ref: Reference count.
> > * @dev: The struct drevice owning the device-private memory.
> > + * @pagemap: Pointer to the underlying dev_pagemap.
> > */
> > struct drm_pagemap {
> > const struct drm_pagemap_ops *ops;
> > + struct kref ref;
> > struct device *dev;
> > + struct dev_pagemap *pagemap;
> > };
> >
> > struct drm_pagemap_devmem;
> > @@ -202,6 +206,37 @@ struct drm_pagemap_devmem_ops {
> > unsigned long npages);
> > };
> >
> > +struct drm_pagemap *drm_pagemap_create(struct device *dev,
> > + struct dev_pagemap
> > *pagemap,
> > + const struct
> > drm_pagemap_ops *ops);
> > +
> > +#if IS_ENABLED(CONFIG_DRM_GPUSVM)
> > +
> > +void drm_pagemap_put(struct drm_pagemap *dpagemap);
> > +
> > +#else
> > +
> > +static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
> > +{
> > +}
> > +
> > +#endif /* IS_ENABLED(CONFIG_DRM_GPUSVM) */
> > +
> > +/**
> > + * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> > + * @dpagemap: Pointer to the struct drm_pagemap.
> > + *
> > + * Return: Pointer to the struct drm_pagemap.
> > + */
> > +static inline struct drm_pagemap *
> > +drm_pagemap_get(struct drm_pagemap *dpagemap)
> > +{
> > + if (likely(dpagemap))
> > + kref_get(&dpagemap->ref);
> > +
> > + return dpagemap;
> > +}
> > +
> > /**
> > * struct drm_pagemap_devmem - Structure representing a GPU SVM
> > device memory allocation
> > *
> > @@ -246,3 +281,4 @@ int drm_pagemap_populate_mm(struct drm_pagemap
> > *dpagemap,
> > unsigned long timeslice_ms);
> >
> > #endif
> > +
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
2025-10-25 12:03 ` [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
@ 2025-10-25 12:03 ` Thomas Hellström
2025-10-29 0:33 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes Thomas Hellström
` (12 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:03 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
To be able to keep track of drm_pagemap usage, add a refcounted
backpointer to struct drm_pagemap_zdd. This will keep the drm_pagemap
reference count from dropping to zero as long as there are drm_pagemap
pages present in a CPU address space.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 4b8692f0b2a2..173b3ecb07d5 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -62,6 +62,7 @@
*
* @refcount: Reference count for the zdd
* @devmem_allocation: device memory allocation
+ * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap.
* @device_private_page_owner: Device private pages owner
*
* This structure serves as a generic wrapper installed in
@@ -74,11 +75,13 @@
struct drm_pagemap_zdd {
struct kref refcount;
struct drm_pagemap_devmem *devmem_allocation;
+ struct drm_pagemap *dpagemap;
void *device_private_page_owner;
};
/**
* drm_pagemap_zdd_alloc() - Allocate a zdd structure.
+ * @dpagemap: Pointer to the underlying struct drm_pagemap.
* @device_private_page_owner: Device private pages owner
*
* This function allocates and initializes a new zdd structure. It sets up the
@@ -87,7 +90,7 @@ struct drm_pagemap_zdd {
* Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
*/
static struct drm_pagemap_zdd *
-drm_pagemap_zdd_alloc(void *device_private_page_owner)
+drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap, void *device_private_page_owner)
{
struct drm_pagemap_zdd *zdd;
@@ -98,6 +101,7 @@ drm_pagemap_zdd_alloc(void *device_private_page_owner)
kref_init(&zdd->refcount);
zdd->devmem_allocation = NULL;
zdd->device_private_page_owner = device_private_page_owner;
+ zdd->dpagemap = drm_pagemap_get(dpagemap);
return zdd;
}
@@ -127,6 +131,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
struct drm_pagemap_zdd *zdd =
container_of(ref, struct drm_pagemap_zdd, refcount);
struct drm_pagemap_devmem *devmem = zdd->devmem_allocation;
+ struct drm_pagemap *dpagemap = zdd->dpagemap;
if (devmem) {
complete_all(&devmem->detached);
@@ -134,6 +139,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
devmem->ops->devmem_release(devmem);
}
kfree(zdd);
+ drm_pagemap_put(dpagemap);
}
/**
@@ -366,7 +372,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
- zdd = drm_pagemap_zdd_alloc(pgmap_owner);
+ zdd = drm_pagemap_zdd_alloc(devmem_allocation->dpagemap, pgmap_owner);
if (!zdd) {
err = -ENOMEM;
goto err_free;
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd
2025-10-25 12:03 ` [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd Thomas Hellström
@ 2025-10-29 0:33 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 0:33 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:03:59PM +0200, Thomas Hellström wrote:
> To be able to keep track of drm_pagemap usage, add a refcounted
> backpointer to struct drm_pagemap_zdd. This will keep the drm_pagemap
> reference count from dropping to zero as long as there are drm_pagemap
> pages present in a CPU address space.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/drm_pagemap.c | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 4b8692f0b2a2..173b3ecb07d5 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -62,6 +62,7 @@
> *
> * @refcount: Reference count for the zdd
> * @devmem_allocation: device memory allocation
> + * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap.
> * @device_private_page_owner: Device private pages owner
> *
> * This structure serves as a generic wrapper installed in
> @@ -74,11 +75,13 @@
> struct drm_pagemap_zdd {
> struct kref refcount;
> struct drm_pagemap_devmem *devmem_allocation;
> + struct drm_pagemap *dpagemap;
> void *device_private_page_owner;
> };
>
> /**
> * drm_pagemap_zdd_alloc() - Allocate a zdd structure.
> + * @dpagemap: Pointer to the underlying struct drm_pagemap.
> * @device_private_page_owner: Device private pages owner
> *
> * This function allocates and initializes a new zdd structure. It sets up the
> @@ -87,7 +90,7 @@ struct drm_pagemap_zdd {
> * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
> */
> static struct drm_pagemap_zdd *
> -drm_pagemap_zdd_alloc(void *device_private_page_owner)
> +drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap, void *device_private_page_owner)
> {
> struct drm_pagemap_zdd *zdd;
>
> @@ -98,6 +101,7 @@ drm_pagemap_zdd_alloc(void *device_private_page_owner)
> kref_init(&zdd->refcount);
> zdd->devmem_allocation = NULL;
> zdd->device_private_page_owner = device_private_page_owner;
> + zdd->dpagemap = drm_pagemap_get(dpagemap);
>
> return zdd;
> }
> @@ -127,6 +131,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
> struct drm_pagemap_zdd *zdd =
> container_of(ref, struct drm_pagemap_zdd, refcount);
> struct drm_pagemap_devmem *devmem = zdd->devmem_allocation;
> + struct drm_pagemap *dpagemap = zdd->dpagemap;
>
> if (devmem) {
> complete_all(&devmem->detached);
> @@ -134,6 +139,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
> devmem->ops->devmem_release(devmem);
> }
> kfree(zdd);
> + drm_pagemap_put(dpagemap);
> }
>
> /**
> @@ -366,7 +372,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
> pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
> pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
>
> - zdd = drm_pagemap_zdd_alloc(pgmap_owner);
> + zdd = drm_pagemap_zdd_alloc(devmem_allocation->dpagemap, pgmap_owner);
> if (!zdd) {
> err = -ENOMEM;
> goto err_free;
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
2025-10-25 12:03 ` [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
2025-10-25 12:03 ` [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-29 0:46 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
` (11 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Even if the drm_pagemap provider has released its reference on
the drm_pagemap, references may be held by still active pages.
Ensure that we hold a reference on the provider drm device and
modules for as long as we might need to use the drm_pagemap ops.
Note that in theory, the drm_gpusvm_helper module may be unloaded
as soon as the final module_put() of the provider driver module is
executed, so we need to add a module_exit() function that waits
for the work item executing the module_put() has completed.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 101 ++++++++++++++++++++++++++++++++--
drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
include/drm/drm_pagemap.h | 10 +++-
3 files changed, 117 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 173b3ecb07d5..fb18a80d6a1c 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
+#include <drm/drm_print.h>
/**
* DOC: Overview
@@ -544,16 +545,92 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
return -ENOMEM;
}
+static void drm_pagemap_dev_unhold_work(struct work_struct *work);
+static LLIST_HEAD(drm_pagemap_unhold_list);
+static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
+
+/**
+ * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
+ * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
+ * @drm: drm device to put.
+ *
+ * When a struct drm_pagemap is released, we also need to release the
+ * reference it holds on the drm device. However, typically that needs
+ * to be done separately from a system-wide workqueue.
+ * Each time a struct drm_pagemap is initialized
+ * (or re-initialized if cached) therefore allocate a separate
+ * drm_pagemap_dev_hold item, from which we put the drm device and
+ * associated module.
+ */
+struct drm_pagemap_dev_hold {
+ struct llist_node link;
+ struct drm_device *drm;
+};
+
static void drm_pagemap_release(struct kref *ref)
{
struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
-
+ struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
+
+ /*
+ * We know the pagemap provider is alive at this point, since
+ * the struct drm_pagemap_dev_hold holds a reference to the
+ * pagemap provider drm_device and its module.
+ */
+ dpagemap->dev_hold = NULL;
kfree(dpagemap);
+ llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
+ schedule_work(&drm_pagemap_work);
+ /*
+ * Here, either the provider device is still alive, since if called from
+ * page_free(), the caller is holding a reference on the dev_pagemap,
+ * or if called from drm_pagemap_put(), the direct caller is still alive.
+ * This ensures we can't race with THIS module unload.
+ */
+}
+
+static void drm_pagemap_dev_unhold_work(struct work_struct *work)
+{
+ struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
+ struct drm_pagemap_dev_hold *dev_hold, *next;
+
+ /*
+ * Deferred release of drm_pagemap provider device and module.
+ * THIS module is kept alive during the release by the
+ * flush_work() in the drm_pagemap_exit() function.
+ */
+ llist_for_each_entry_safe(dev_hold, next, node, link) {
+ struct drm_device *drm = dev_hold->drm;
+ struct module *module = drm->driver->fops->owner;
+
+ drm_dbg(drm, "Releasing reference on provider device and module.\n");
+ drm_dev_put(drm);
+ module_put(module);
+ kfree(dev_hold);
+ }
+}
+
+static struct drm_pagemap_dev_hold *
+drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_dev_hold *dev_hold;
+ struct drm_device *drm = dpagemap->drm;
+
+ dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
+ if (!dev_hold)
+ return ERR_PTR(-ENOMEM);
+
+ init_llist_node(&dev_hold->link);
+ dev_hold->drm = drm;
+ (void)try_module_get(drm->driver->fops->owner);
+ drm_dev_get(drm);
+
+ return dev_hold;
}
/**
* drm_pagemap_create() - Create a struct drm_pagemap.
- * @dev: Pointer to a struct device providing the device-private memory.
+ * @drm: Pointer to a struct drm_device providing the device-private memory.
* @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
* @ops: Pointer to the struct drm_pagemap_ops.
*
@@ -563,20 +640,28 @@ static void drm_pagemap_release(struct kref *ref)
* Error pointer on error.
*/
struct drm_pagemap *
-drm_pagemap_create(struct device *dev,
+drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops)
{
struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
+ struct drm_pagemap_dev_hold *dev_hold;
if (!dpagemap)
return ERR_PTR(-ENOMEM);
kref_init(&dpagemap->ref);
- dpagemap->dev = dev;
+ dpagemap->drm = drm;
dpagemap->ops = ops;
dpagemap->pagemap = pagemap;
+ dev_hold = drm_pagemap_dev_hold(dpagemap);
+ if (IS_ERR(dev_hold)) {
+ kfree(dpagemap);
+ return ERR_CAST(dev_hold);
+ }
+ dpagemap->dev_hold = dev_hold;
+
return dpagemap;
}
EXPORT_SYMBOL(drm_pagemap_create);
@@ -937,3 +1022,11 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
return err;
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);
+
+static void drm_pagemap_exit(void)
+{
+ flush_work(&drm_pagemap_work);
+ if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
+ disable_work_sync(&drm_pagemap_work);
+}
+module_exit(drm_pagemap_exit);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6d2c6c144315..f6ee22da2e95 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1437,7 +1437,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
unsigned int order,
enum dma_data_direction dir)
{
- struct device *pgmap_dev = dpagemap->dev;
+ struct device *pgmap_dev = dpagemap->drm->dev;
enum drm_interconnect_protocol prot;
dma_addr_t addr;
@@ -1457,6 +1457,14 @@ static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.populate_mm = xe_drm_pagemap_populate_mm,
};
+static void xe_devm_release(void *data)
+{
+ struct xe_vram_region *vr = data;
+
+ drm_pagemap_put(vr->dpagemap);
+ vr->dpagemap = NULL;
+}
+
/**
* xe_devm_add: Remap and provide memmap backing for device memory
* @tile: tile that the memory region belongs to
@@ -1482,7 +1490,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return ret;
}
- vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
+ vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
&xe_drm_pagemap_ops);
if (IS_ERR(vr->dpagemap)) {
drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
@@ -1490,6 +1498,9 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
ret = PTR_ERR(vr->dpagemap);
goto out_no_dpagemap;
}
+ ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
+ if (ret)
+ goto out_no_dpagemap;
vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
vr->pagemap.range.start = res->start;
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index 2c7de928865b..5cfe54331ba7 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -9,6 +9,7 @@
#define NR_PAGES(order) (1U << (order))
struct drm_pagemap;
+struct drm_pagemap_dev_hold;
struct drm_pagemap_zdd;
struct device;
@@ -130,14 +131,17 @@ struct drm_pagemap_ops {
* used for device p2p handshaking.
* @ops: The struct drm_pagemap_ops.
* @ref: Reference count.
- * @dev: The struct drevice owning the device-private memory.
+ * @drm: The struct drm device owning the device-private memory.
* @pagemap: Pointer to the underlying dev_pagemap.
+ * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
+ * device referencing.
*/
struct drm_pagemap {
const struct drm_pagemap_ops *ops;
struct kref ref;
- struct device *dev;
+ struct drm_device *drm;
struct dev_pagemap *pagemap;
+ struct drm_pagemap_dev_hold *dev_hold;
};
struct drm_pagemap_devmem;
@@ -206,7 +210,7 @@ struct drm_pagemap_devmem_ops {
unsigned long npages);
};
-struct drm_pagemap *drm_pagemap_create(struct device *dev,
+struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops);
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
2025-10-25 12:04 ` [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes Thomas Hellström
@ 2025-10-29 0:46 ` Matthew Brost
2025-10-29 14:49 ` Thomas Hellström
0 siblings, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 0:46 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:00PM +0200, Thomas Hellström wrote:
> Even if the drm_pagemap provider has released its reference on
> the drm_pagemap, references may be held by still active pages.
> Ensure that we hold a reference on the provider drm device and
> modules for as long as we might need to use the drm_pagemap ops.
>
Just to make sure I’m understanding this correctly — this is intended to
guard against the devm action [1] running while a device is still
holding references to another device’s pages, right?
[1] https://elixir.bootlin.com/linux/v6.17.5/source/kernel/resource.c#L1993
> Note that in theory, the drm_gpusvm_helper module may be unloaded
> as soon as the final module_put() of the provider driver module is
> executed, so we need to add a module_exit() function that waits
> for the work item executing the module_put() has completed.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/drm_pagemap.c | 101 ++++++++++++++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
> include/drm/drm_pagemap.h | 10 +++-
> 3 files changed, 117 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 173b3ecb07d5..fb18a80d6a1c 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -8,6 +8,7 @@
> #include <linux/pagemap.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_pagemap.h>
> +#include <drm/drm_print.h>
>
> /**
> * DOC: Overview
> @@ -544,16 +545,92 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> return -ENOMEM;
> }
>
> +static void drm_pagemap_dev_unhold_work(struct work_struct *work);
> +static LLIST_HEAD(drm_pagemap_unhold_list);
> +static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
> +
> +/**
> + * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
> + * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
> + * @drm: drm device to put.
> + *
> + * When a struct drm_pagemap is released, we also need to release the
> + * reference it holds on the drm device. However, typically that needs
> + * to be done separately from a system-wide workqueue.
> + * Each time a struct drm_pagemap is initialized
> + * (or re-initialized if cached) therefore allocate a separate
> + * drm_pagemap_dev_hold item, from which we put the drm device and
> + * associated module.
> + */
> +struct drm_pagemap_dev_hold {
> + struct llist_node link;
> + struct drm_device *drm;
> +};
> +
> static void drm_pagemap_release(struct kref *ref)
> {
> struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
> -
> + struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
> +
> + /*
> + * We know the pagemap provider is alive at this point, since
> + * the struct drm_pagemap_dev_hold holds a reference to the
> + * pagemap provider drm_device and its module.
> + */
> + dpagemap->dev_hold = NULL;
> kfree(dpagemap);
> + llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> + schedule_work(&drm_pagemap_work);
> + /*
> + * Here, either the provider device is still alive, since if called from
> + * page_free(), the caller is holding a reference on the dev_pagemap,
> + * or if called from drm_pagemap_put(), the direct caller is still alive.
> + * This ensures we can't race with THIS module unload.
> + */
> +}
> +
> +static void drm_pagemap_dev_unhold_work(struct work_struct *work)
> +{
> + struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
> + struct drm_pagemap_dev_hold *dev_hold, *next;
> +
> + /*
> + * Deferred release of drm_pagemap provider device and module.
> + * THIS module is kept alive during the release by the
> + * flush_work() in the drm_pagemap_exit() function.
> + */
> + llist_for_each_entry_safe(dev_hold, next, node, link) {
> + struct drm_device *drm = dev_hold->drm;
> + struct module *module = drm->driver->fops->owner;
> +
> + drm_dbg(drm, "Releasing reference on provider device and module.\n");
> + drm_dev_put(drm);
> + module_put(module);
> + kfree(dev_hold);
> + }
> +}
> +
> +static struct drm_pagemap_dev_hold *
> +drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_dev_hold *dev_hold;
> + struct drm_device *drm = dpagemap->drm;
> +
> + dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
> + if (!dev_hold)
> + return ERR_PTR(-ENOMEM);
> +
> + init_llist_node(&dev_hold->link);
> + dev_hold->drm = drm;
> + (void)try_module_get(drm->driver->fops->owner);
> + drm_dev_get(drm);
> +
> + return dev_hold;
> }
>
> /**
> * drm_pagemap_create() - Create a struct drm_pagemap.
> - * @dev: Pointer to a struct device providing the device-private memory.
> + * @drm: Pointer to a struct drm_device providing the device-private memory.
> * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
> * @ops: Pointer to the struct drm_pagemap_ops.
> *
> @@ -563,20 +640,28 @@ static void drm_pagemap_release(struct kref *ref)
> * Error pointer on error.
> */
> struct drm_pagemap *
> -drm_pagemap_create(struct device *dev,
> +drm_pagemap_create(struct drm_device *drm,
> struct dev_pagemap *pagemap,
> const struct drm_pagemap_ops *ops)
> {
> struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> + struct drm_pagemap_dev_hold *dev_hold;
>
> if (!dpagemap)
> return ERR_PTR(-ENOMEM);
>
> kref_init(&dpagemap->ref);
> - dpagemap->dev = dev;
> + dpagemap->drm = drm;
> dpagemap->ops = ops;
> dpagemap->pagemap = pagemap;
>
> + dev_hold = drm_pagemap_dev_hold(dpagemap);
> + if (IS_ERR(dev_hold)) {
> + kfree(dpagemap);
> + return ERR_CAST(dev_hold);
> + }
> + dpagemap->dev_hold = dev_hold;
> +
> return dpagemap;
> }
> EXPORT_SYMBOL(drm_pagemap_create);
> @@ -937,3 +1022,11 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> return err;
> }
> EXPORT_SYMBOL(drm_pagemap_populate_mm);
> +
> +static void drm_pagemap_exit(void)
> +{
> + flush_work(&drm_pagemap_work);
> + if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
> + disable_work_sync(&drm_pagemap_work);
> +}
> +module_exit(drm_pagemap_exit);
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 6d2c6c144315..f6ee22da2e95 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -1437,7 +1437,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
> unsigned int order,
> enum dma_data_direction dir)
> {
> - struct device *pgmap_dev = dpagemap->dev;
> + struct device *pgmap_dev = dpagemap->drm->dev;
> enum drm_interconnect_protocol prot;
> dma_addr_t addr;
>
> @@ -1457,6 +1457,14 @@ static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> .populate_mm = xe_drm_pagemap_populate_mm,
> };
>
> +static void xe_devm_release(void *data)
> +{
> + struct xe_vram_region *vr = data;
> +
> + drm_pagemap_put(vr->dpagemap);
> + vr->dpagemap = NULL;
> +}
> +
> /**
> * xe_devm_add: Remap and provide memmap backing for device memory
> * @tile: tile that the memory region belongs to
> @@ -1482,7 +1490,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> return ret;
> }
>
> - vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> + vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
> &xe_drm_pagemap_ops);
> if (IS_ERR(vr->dpagemap)) {
> drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
> @@ -1490,6 +1498,9 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> ret = PTR_ERR(vr->dpagemap);
> goto out_no_dpagemap;
> }
> + ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
> + if (ret)
> + goto out_no_dpagemap;
I mentioned this in first patch that this was missing, maybe move this
part to the first patch even though this will get removed a bit later.
Matt
>
> vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> vr->pagemap.range.start = res->start;
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index 2c7de928865b..5cfe54331ba7 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -9,6 +9,7 @@
> #define NR_PAGES(order) (1U << (order))
>
> struct drm_pagemap;
> +struct drm_pagemap_dev_hold;
> struct drm_pagemap_zdd;
> struct device;
>
> @@ -130,14 +131,17 @@ struct drm_pagemap_ops {
> * used for device p2p handshaking.
> * @ops: The struct drm_pagemap_ops.
> * @ref: Reference count.
> - * @dev: The struct drevice owning the device-private memory.
> + * @drm: The struct drm device owning the device-private memory.
> * @pagemap: Pointer to the underlying dev_pagemap.
> + * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> + * device referencing.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> struct kref ref;
> - struct device *dev;
> + struct drm_device *drm;
> struct dev_pagemap *pagemap;
> + struct drm_pagemap_dev_hold *dev_hold;
> };
>
> struct drm_pagemap_devmem;
> @@ -206,7 +210,7 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> -struct drm_pagemap *drm_pagemap_create(struct device *dev,
> +struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> struct dev_pagemap *pagemap,
> const struct drm_pagemap_ops *ops);
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
2025-10-29 0:46 ` Matthew Brost
@ 2025-10-29 14:49 ` Thomas Hellström
2025-10-30 2:46 ` Matthew Brost
0 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-29 14:49 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 17:46 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:00PM +0200, Thomas Hellström wrote:
> > Even if the drm_pagemap provider has released its reference on
> > the drm_pagemap, references may be held by still active pages.
> > Ensure that we hold a reference on the provider drm device and
> > modules for as long as we might need to use the drm_pagemap ops.
> >
>
> Just to make sure I’m understanding this correctly — this is intended
> to
> guard against the devm action [1] running while a device is still
> holding references to another device’s pages, right?
>
> [1]
> https://elixir.bootlin.com/linux/v6.17.5/source/kernel/resource.c#L1993
Actually removing the dev_pagemap and its region is allowed while
another device holds a reference on the *drm_pagemap*. For example if
you have two devices. Device 0 executes from the memory of device 1.
Suddenly you feel like offlining / unbinding device 1. When you execute
unbind, the driver evicts all SVM bos and thereby frees all device-
private pages. But device 0 still has a reference to the drm_pagemap,
even if it's unusable: Any VRAM migration trying to use the drm_pagemap
will error with -ENODEV, so depending on how the driver handles that,
it will continue executing out of another memory region. At this point
it would've been possible without this code to rmmod the drm_pagemap
provider device module, and its drm device would've been freed without
this code, and when drm_pagemap_put() eventually is called, things go
boom. So the commit message is a bit misleading.
In the case where we only have pages left, the last page should be
freed from the device remove callback where bos are evicted. At that
point, the provider drm device is still alive as the devm callbacks
haven't executed yet. Also a rmmod wold typically cause the devm
callbacks to execute so that should also be safe without this patch. At
least if the page freeing doesn't trigger any async callbacks that
aren't waited on before removal.
So yeah, I need to update the commit message a bit. We should also
craft an IGT that unbinds device 1 while device 0 is executing out of
its memory and verify that execution completes with correct results
anyway.
/Thomas
>
> > Note that in theory, the drm_gpusvm_helper module may be unloaded
> > as soon as the final module_put() of the provider driver module is
> > executed, so we need to add a module_exit() function that waits
> > for the work item executing the module_put() has completed.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/drm_pagemap.c | 101
> > ++++++++++++++++++++++++++++++++--
> > drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
> > include/drm/drm_pagemap.h | 10 +++-
> > 3 files changed, 117 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > b/drivers/gpu/drm/drm_pagemap.c
> > index 173b3ecb07d5..fb18a80d6a1c 100644
> > --- a/drivers/gpu/drm/drm_pagemap.c
> > +++ b/drivers/gpu/drm/drm_pagemap.c
> > @@ -8,6 +8,7 @@
> > #include <linux/pagemap.h>
> > #include <drm/drm_drv.h>
> > #include <drm/drm_pagemap.h>
> > +#include <drm/drm_print.h>
> >
> > /**
> > * DOC: Overview
> > @@ -544,16 +545,92 @@ static int
> > drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> > return -ENOMEM;
> > }
> >
> > +static void drm_pagemap_dev_unhold_work(struct work_struct *work);
> > +static LLIST_HEAD(drm_pagemap_unhold_list);
> > +static DECLARE_WORK(drm_pagemap_work,
> > drm_pagemap_dev_unhold_work);
> > +
> > +/**
> > + * struct drm_pagemap_dev_hold - Struct to aid in drm_device
> > release.
> > + * @link: Link into drm_pagemap_unhold_list for deferred reference
> > releases.
> > + * @drm: drm device to put.
> > + *
> > + * When a struct drm_pagemap is released, we also need to release
> > the
> > + * reference it holds on the drm device. However, typically that
> > needs
> > + * to be done separately from a system-wide workqueue.
> > + * Each time a struct drm_pagemap is initialized
> > + * (or re-initialized if cached) therefore allocate a separate
> > + * drm_pagemap_dev_hold item, from which we put the drm device and
> > + * associated module.
> > + */
> > +struct drm_pagemap_dev_hold {
> > + struct llist_node link;
> > + struct drm_device *drm;
> > +};
> > +
> > static void drm_pagemap_release(struct kref *ref)
> > {
> > struct drm_pagemap *dpagemap = container_of(ref,
> > typeof(*dpagemap), ref);
> > -
> > + struct drm_pagemap_dev_hold *dev_hold = dpagemap-
> > >dev_hold;
> > +
> > + /*
> > + * We know the pagemap provider is alive at this point,
> > since
> > + * the struct drm_pagemap_dev_hold holds a reference to
> > the
> > + * pagemap provider drm_device and its module.
> > + */
> > + dpagemap->dev_hold = NULL;
> > kfree(dpagemap);
> > + llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> > + schedule_work(&drm_pagemap_work);
> > + /*
> > + * Here, either the provider device is still alive, since
> > if called from
> > + * page_free(), the caller is holding a reference on the
> > dev_pagemap,
> > + * or if called from drm_pagemap_put(), the direct caller
> > is still alive.
> > + * This ensures we can't race with THIS module unload.
> > + */
> > +}
> > +
> > +static void drm_pagemap_dev_unhold_work(struct work_struct *work)
> > +{
> > + struct llist_node *node =
> > llist_del_all(&drm_pagemap_unhold_list);
> > + struct drm_pagemap_dev_hold *dev_hold, *next;
> > +
> > + /*
> > + * Deferred release of drm_pagemap provider device and
> > module.
> > + * THIS module is kept alive during the release by the
> > + * flush_work() in the drm_pagemap_exit() function.
> > + */
> > + llist_for_each_entry_safe(dev_hold, next, node, link) {
> > + struct drm_device *drm = dev_hold->drm;
> > + struct module *module = drm->driver->fops->owner;
> > +
> > + drm_dbg(drm, "Releasing reference on provider
> > device and module.\n");
> > + drm_dev_put(drm);
> > + module_put(module);
> > + kfree(dev_hold);
> > + }
> > +}
> > +
> > +static struct drm_pagemap_dev_hold *
> > +drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> > +{
> > + struct drm_pagemap_dev_hold *dev_hold;
> > + struct drm_device *drm = dpagemap->drm;
> > +
> > + dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
> > + if (!dev_hold)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + init_llist_node(&dev_hold->link);
> > + dev_hold->drm = drm;
> > + (void)try_module_get(drm->driver->fops->owner);
> > + drm_dev_get(drm);
> > +
> > + return dev_hold;
> > }
> >
> > /**
> > * drm_pagemap_create() - Create a struct drm_pagemap.
> > - * @dev: Pointer to a struct device providing the device-private
> > memory.
> > + * @drm: Pointer to a struct drm_device providing the device-
> > private memory.
> > * @pagemap: Pointer to a pre-setup struct dev_pagemap providing
> > the struct pages.
> > * @ops: Pointer to the struct drm_pagemap_ops.
> > *
> > @@ -563,20 +640,28 @@ static void drm_pagemap_release(struct kref
> > *ref)
> > * Error pointer on error.
> > */
> > struct drm_pagemap *
> > -drm_pagemap_create(struct device *dev,
> > +drm_pagemap_create(struct drm_device *drm,
> > struct dev_pagemap *pagemap,
> > const struct drm_pagemap_ops *ops)
> > {
> > struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap),
> > GFP_KERNEL);
> > + struct drm_pagemap_dev_hold *dev_hold;
> >
> > if (!dpagemap)
> > return ERR_PTR(-ENOMEM);
> >
> > kref_init(&dpagemap->ref);
> > - dpagemap->dev = dev;
> > + dpagemap->drm = drm;
> > dpagemap->ops = ops;
> > dpagemap->pagemap = pagemap;
> >
> > + dev_hold = drm_pagemap_dev_hold(dpagemap);
> > + if (IS_ERR(dev_hold)) {
> > + kfree(dpagemap);
> > + return ERR_CAST(dev_hold);
> > + }
> > + dpagemap->dev_hold = dev_hold;
> > +
> > return dpagemap;
> > }
> > EXPORT_SYMBOL(drm_pagemap_create);
> > @@ -937,3 +1022,11 @@ int drm_pagemap_populate_mm(struct
> > drm_pagemap *dpagemap,
> > return err;
> > }
> > EXPORT_SYMBOL(drm_pagemap_populate_mm);
> > +
> > +static void drm_pagemap_exit(void)
> > +{
> > + flush_work(&drm_pagemap_work);
> > + if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
> > + disable_work_sync(&drm_pagemap_work);
> > +}
> > +module_exit(drm_pagemap_exit);
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 6d2c6c144315..f6ee22da2e95 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -1437,7 +1437,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap
> > *dpagemap,
> > unsigned int order,
> > enum dma_data_direction dir)
> > {
> > - struct device *pgmap_dev = dpagemap->dev;
> > + struct device *pgmap_dev = dpagemap->drm->dev;
> > enum drm_interconnect_protocol prot;
> > dma_addr_t addr;
> >
> > @@ -1457,6 +1457,14 @@ static const struct drm_pagemap_ops
> > xe_drm_pagemap_ops = {
> > .populate_mm = xe_drm_pagemap_populate_mm,
> > };
> >
> > +static void xe_devm_release(void *data)
> > +{
> > + struct xe_vram_region *vr = data;
> > +
> > + drm_pagemap_put(vr->dpagemap);
> > + vr->dpagemap = NULL;
> > +}
> > +
> > /**
> > * xe_devm_add: Remap and provide memmap backing for device memory
> > * @tile: tile that the memory region belongs to
> > @@ -1482,7 +1490,7 @@ int xe_devm_add(struct xe_tile *tile, struct
> > xe_vram_region *vr)
> > return ret;
> > }
> >
> > - vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> > + vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
> > &xe_drm_pagemap_ops);
> > if (IS_ERR(vr->dpagemap)) {
> > drm_err(&xe->drm, "Failed to create drm_pagemap
> > tile %d memory: %pe\n",
> > @@ -1490,6 +1498,9 @@ int xe_devm_add(struct xe_tile *tile, struct
> > xe_vram_region *vr)
> > ret = PTR_ERR(vr->dpagemap);
> > goto out_no_dpagemap;
> > }
> > + ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
> > + if (ret)
> > + goto out_no_dpagemap;
>
> I mentioned this in first patch that this was missing, maybe move
> this
> part to the first patch even though this will get removed a bit
> later.
>
> Matt
>
> >
> > vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> > vr->pagemap.range.start = res->start;
> > diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> > index 2c7de928865b..5cfe54331ba7 100644
> > --- a/include/drm/drm_pagemap.h
> > +++ b/include/drm/drm_pagemap.h
> > @@ -9,6 +9,7 @@
> > #define NR_PAGES(order) (1U << (order))
> >
> > struct drm_pagemap;
> > +struct drm_pagemap_dev_hold;
> > struct drm_pagemap_zdd;
> > struct device;
> >
> > @@ -130,14 +131,17 @@ struct drm_pagemap_ops {
> > * used for device p2p handshaking.
> > * @ops: The struct drm_pagemap_ops.
> > * @ref: Reference count.
> > - * @dev: The struct drevice owning the device-private memory.
> > + * @drm: The struct drm device owning the device-private memory.
> > * @pagemap: Pointer to the underlying dev_pagemap.
> > + * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> > + * device referencing.
> > */
> > struct drm_pagemap {
> > const struct drm_pagemap_ops *ops;
> > struct kref ref;
> > - struct device *dev;
> > + struct drm_device *drm;
> > struct dev_pagemap *pagemap;
> > + struct drm_pagemap_dev_hold *dev_hold;
> > };
> >
> > struct drm_pagemap_devmem;
> > @@ -206,7 +210,7 @@ struct drm_pagemap_devmem_ops {
> > unsigned long npages);
> > };
> >
> > -struct drm_pagemap *drm_pagemap_create(struct device *dev,
> > +struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> > struct dev_pagemap
> > *pagemap,
> > const struct
> > drm_pagemap_ops *ops);
> >
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
2025-10-29 14:49 ` Thomas Hellström
@ 2025-10-30 2:46 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-30 2:46 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Wed, Oct 29, 2025 at 03:49:47PM +0100, Thomas Hellström wrote:
> On Tue, 2025-10-28 at 17:46 -0700, Matthew Brost wrote:
> > On Sat, Oct 25, 2025 at 02:04:00PM +0200, Thomas Hellström wrote:
> > > Even if the drm_pagemap provider has released its reference on
> > > the drm_pagemap, references may be held by still active pages.
> > > Ensure that we hold a reference on the provider drm device and
> > > modules for as long as we might need to use the drm_pagemap ops.
> > >
> >
> > Just to make sure I’m understanding this correctly — this is intended
> > to
> > guard against the devm action [1] running while a device is still
> > holding references to another device’s pages, right?
> >
> > [1]
> > https://elixir.bootlin.com/linux/v6.17.5/source/kernel/resource.c#L1993
>
> Actually removing the dev_pagemap and its region is allowed while
> another device holds a reference on the *drm_pagemap*. For example if
> you have two devices. Device 0 executes from the memory of device 1.
> Suddenly you feel like offlining / unbinding device 1. When you execute
> unbind, the driver evicts all SVM bos and thereby frees all device-
> private pages. But device 0 still has a reference to the drm_pagemap,
> even if it's unusable: Any VRAM migration trying to use the drm_pagemap
> will error with -ENODEV, so depending on how the driver handles that,
> it will continue executing out of another memory region. At this point
> it would've been possible without this code to rmmod the drm_pagemap
> provider device module, and its drm device would've been freed without
> this code, and when drm_pagemap_put() eventually is called, things go
> boom. So the commit message is a bit misleading.
>
> In the case where we only have pages left, the last page should be
> freed from the device remove callback where bos are evicted. At that
> point, the provider drm device is still alive as the devm callbacks
> haven't executed yet. Also a rmmod wold typically cause the devm
> callbacks to execute so that should also be safe without this patch. At
> least if the page freeing doesn't trigger any async callbacks that
> aren't waited on before removal.
>
I believe I get it...
> So yeah, I need to update the commit message a bit. We should also
> craft an IGT that unbinds device 1 while device 0 is executing out of
> its memory and verify that execution completes with correct results
> anyway.
>
Yes, this is weird corner we certainly should test out.
Matt
> /Thomas
>
>
> >
> > > Note that in theory, the drm_gpusvm_helper module may be unloaded
> > > as soon as the final module_put() of the provider driver module is
> > > executed, so we need to add a module_exit() function that waits
> > > for the work item executing the module_put() has completed.
> > >
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > ---
> > > drivers/gpu/drm/drm_pagemap.c | 101
> > > ++++++++++++++++++++++++++++++++--
> > > drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
> > > include/drm/drm_pagemap.h | 10 +++-
> > > 3 files changed, 117 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > > b/drivers/gpu/drm/drm_pagemap.c
> > > index 173b3ecb07d5..fb18a80d6a1c 100644
> > > --- a/drivers/gpu/drm/drm_pagemap.c
> > > +++ b/drivers/gpu/drm/drm_pagemap.c
> > > @@ -8,6 +8,7 @@
> > > #include <linux/pagemap.h>
> > > #include <drm/drm_drv.h>
> > > #include <drm/drm_pagemap.h>
> > > +#include <drm/drm_print.h>
> > >
> > > /**
> > > * DOC: Overview
> > > @@ -544,16 +545,92 @@ static int
> > > drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
> > > return -ENOMEM;
> > > }
> > >
> > > +static void drm_pagemap_dev_unhold_work(struct work_struct *work);
> > > +static LLIST_HEAD(drm_pagemap_unhold_list);
> > > +static DECLARE_WORK(drm_pagemap_work,
> > > drm_pagemap_dev_unhold_work);
> > > +
> > > +/**
> > > + * struct drm_pagemap_dev_hold - Struct to aid in drm_device
> > > release.
> > > + * @link: Link into drm_pagemap_unhold_list for deferred reference
> > > releases.
> > > + * @drm: drm device to put.
> > > + *
> > > + * When a struct drm_pagemap is released, we also need to release
> > > the
> > > + * reference it holds on the drm device. However, typically that
> > > needs
> > > + * to be done separately from a system-wide workqueue.
> > > + * Each time a struct drm_pagemap is initialized
> > > + * (or re-initialized if cached) therefore allocate a separate
> > > + * drm_pagemap_dev_hold item, from which we put the drm device and
> > > + * associated module.
> > > + */
> > > +struct drm_pagemap_dev_hold {
> > > + struct llist_node link;
> > > + struct drm_device *drm;
> > > +};
> > > +
> > > static void drm_pagemap_release(struct kref *ref)
> > > {
> > > struct drm_pagemap *dpagemap = container_of(ref,
> > > typeof(*dpagemap), ref);
> > > -
> > > + struct drm_pagemap_dev_hold *dev_hold = dpagemap-
> > > >dev_hold;
> > > +
> > > + /*
> > > + * We know the pagemap provider is alive at this point,
> > > since
> > > + * the struct drm_pagemap_dev_hold holds a reference to
> > > the
> > > + * pagemap provider drm_device and its module.
> > > + */
> > > + dpagemap->dev_hold = NULL;
> > > kfree(dpagemap);
> > > + llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> > > + schedule_work(&drm_pagemap_work);
> > > + /*
> > > + * Here, either the provider device is still alive, since
> > > if called from
> > > + * page_free(), the caller is holding a reference on the
> > > dev_pagemap,
> > > + * or if called from drm_pagemap_put(), the direct caller
> > > is still alive.
> > > + * This ensures we can't race with THIS module unload.
> > > + */
> > > +}
> > > +
> > > +static void drm_pagemap_dev_unhold_work(struct work_struct *work)
> > > +{
> > > + struct llist_node *node =
> > > llist_del_all(&drm_pagemap_unhold_list);
> > > + struct drm_pagemap_dev_hold *dev_hold, *next;
> > > +
> > > + /*
> > > + * Deferred release of drm_pagemap provider device and
> > > module.
> > > + * THIS module is kept alive during the release by the
> > > + * flush_work() in the drm_pagemap_exit() function.
> > > + */
> > > + llist_for_each_entry_safe(dev_hold, next, node, link) {
> > > + struct drm_device *drm = dev_hold->drm;
> > > + struct module *module = drm->driver->fops->owner;
> > > +
> > > + drm_dbg(drm, "Releasing reference on provider
> > > device and module.\n");
> > > + drm_dev_put(drm);
> > > + module_put(module);
> > > + kfree(dev_hold);
> > > + }
> > > +}
> > > +
> > > +static struct drm_pagemap_dev_hold *
> > > +drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> > > +{
> > > + struct drm_pagemap_dev_hold *dev_hold;
> > > + struct drm_device *drm = dpagemap->drm;
> > > +
> > > + dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
> > > + if (!dev_hold)
> > > + return ERR_PTR(-ENOMEM);
> > > +
> > > + init_llist_node(&dev_hold->link);
> > > + dev_hold->drm = drm;
> > > + (void)try_module_get(drm->driver->fops->owner);
> > > + drm_dev_get(drm);
> > > +
> > > + return dev_hold;
> > > }
> > >
> > > /**
> > > * drm_pagemap_create() - Create a struct drm_pagemap.
> > > - * @dev: Pointer to a struct device providing the device-private
> > > memory.
> > > + * @drm: Pointer to a struct drm_device providing the device-
> > > private memory.
> > > * @pagemap: Pointer to a pre-setup struct dev_pagemap providing
> > > the struct pages.
> > > * @ops: Pointer to the struct drm_pagemap_ops.
> > > *
> > > @@ -563,20 +640,28 @@ static void drm_pagemap_release(struct kref
> > > *ref)
> > > * Error pointer on error.
> > > */
> > > struct drm_pagemap *
> > > -drm_pagemap_create(struct device *dev,
> > > +drm_pagemap_create(struct drm_device *drm,
> > > struct dev_pagemap *pagemap,
> > > const struct drm_pagemap_ops *ops)
> > > {
> > > struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap),
> > > GFP_KERNEL);
> > > + struct drm_pagemap_dev_hold *dev_hold;
> > >
> > > if (!dpagemap)
> > > return ERR_PTR(-ENOMEM);
> > >
> > > kref_init(&dpagemap->ref);
> > > - dpagemap->dev = dev;
> > > + dpagemap->drm = drm;
> > > dpagemap->ops = ops;
> > > dpagemap->pagemap = pagemap;
> > >
> > > + dev_hold = drm_pagemap_dev_hold(dpagemap);
> > > + if (IS_ERR(dev_hold)) {
> > > + kfree(dpagemap);
> > > + return ERR_CAST(dev_hold);
> > > + }
> > > + dpagemap->dev_hold = dev_hold;
> > > +
> > > return dpagemap;
> > > }
> > > EXPORT_SYMBOL(drm_pagemap_create);
> > > @@ -937,3 +1022,11 @@ int drm_pagemap_populate_mm(struct
> > > drm_pagemap *dpagemap,
> > > return err;
> > > }
> > > EXPORT_SYMBOL(drm_pagemap_populate_mm);
> > > +
> > > +static void drm_pagemap_exit(void)
> > > +{
> > > + flush_work(&drm_pagemap_work);
> > > + if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
> > > + disable_work_sync(&drm_pagemap_work);
> > > +}
> > > +module_exit(drm_pagemap_exit);
> > > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > > b/drivers/gpu/drm/xe/xe_svm.c
> > > index 6d2c6c144315..f6ee22da2e95 100644
> > > --- a/drivers/gpu/drm/xe/xe_svm.c
> > > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > > @@ -1437,7 +1437,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap
> > > *dpagemap,
> > > unsigned int order,
> > > enum dma_data_direction dir)
> > > {
> > > - struct device *pgmap_dev = dpagemap->dev;
> > > + struct device *pgmap_dev = dpagemap->drm->dev;
> > > enum drm_interconnect_protocol prot;
> > > dma_addr_t addr;
> > >
> > > @@ -1457,6 +1457,14 @@ static const struct drm_pagemap_ops
> > > xe_drm_pagemap_ops = {
> > > .populate_mm = xe_drm_pagemap_populate_mm,
> > > };
> > >
> > > +static void xe_devm_release(void *data)
> > > +{
> > > + struct xe_vram_region *vr = data;
> > > +
> > > + drm_pagemap_put(vr->dpagemap);
> > > + vr->dpagemap = NULL;
> > > +}
> > > +
> > > /**
> > > * xe_devm_add: Remap and provide memmap backing for device memory
> > > * @tile: tile that the memory region belongs to
> > > @@ -1482,7 +1490,7 @@ int xe_devm_add(struct xe_tile *tile, struct
> > > xe_vram_region *vr)
> > > return ret;
> > > }
> > >
> > > - vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
> > > + vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
> > > &xe_drm_pagemap_ops);
> > > if (IS_ERR(vr->dpagemap)) {
> > > drm_err(&xe->drm, "Failed to create drm_pagemap
> > > tile %d memory: %pe\n",
> > > @@ -1490,6 +1498,9 @@ int xe_devm_add(struct xe_tile *tile, struct
> > > xe_vram_region *vr)
> > > ret = PTR_ERR(vr->dpagemap);
> > > goto out_no_dpagemap;
> > > }
> > > + ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
> > > + if (ret)
> > > + goto out_no_dpagemap;
> >
> > I mentioned this in first patch that this was missing, maybe move
> > this
> > part to the first patch even though this will get removed a bit
> > later.
> >
> > Matt
> >
> > >
> > > vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> > > vr->pagemap.range.start = res->start;
> > > diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> > > index 2c7de928865b..5cfe54331ba7 100644
> > > --- a/include/drm/drm_pagemap.h
> > > +++ b/include/drm/drm_pagemap.h
> > > @@ -9,6 +9,7 @@
> > > #define NR_PAGES(order) (1U << (order))
> > >
> > > struct drm_pagemap;
> > > +struct drm_pagemap_dev_hold;
> > > struct drm_pagemap_zdd;
> > > struct device;
> > >
> > > @@ -130,14 +131,17 @@ struct drm_pagemap_ops {
> > > * used for device p2p handshaking.
> > > * @ops: The struct drm_pagemap_ops.
> > > * @ref: Reference count.
> > > - * @dev: The struct drevice owning the device-private memory.
> > > + * @drm: The struct drm device owning the device-private memory.
> > > * @pagemap: Pointer to the underlying dev_pagemap.
> > > + * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> > > + * device referencing.
> > > */
> > > struct drm_pagemap {
> > > const struct drm_pagemap_ops *ops;
> > > struct kref ref;
> > > - struct device *dev;
> > > + struct drm_device *drm;
> > > struct dev_pagemap *pagemap;
> > > + struct drm_pagemap_dev_hold *dev_hold;
> > > };
> > >
> > > struct drm_pagemap_devmem;
> > > @@ -206,7 +210,7 @@ struct drm_pagemap_devmem_ops {
> > > unsigned long npages);
> > > };
> > >
> > > -struct drm_pagemap *drm_pagemap_create(struct device *dev,
> > > +struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> > > struct dev_pagemap
> > > *pagemap,
> > > const struct
> > > drm_pagemap_ops *ops);
> > >
> > > --
> > > 2.51.0
> > >
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (2 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-28 1:23 ` Matthew Brost
` (2 more replies)
2025-10-25 12:04 ` [PATCH 05/15] drm/xe: Use the " Thomas Hellström
` (10 subsequent siblings)
14 siblings, 3 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Pagemaps are costly to set up and tear down, and they consume a lot
of system memory for the struct pages. Ideally they should be
created only when needed.
Add a caching mechanism to allow doing just that: Create the drm_pagemaps
when needed for migration. Keep them around to avoid destruction and
re-creation latencies and destroy inactive/unused drm_pagemaps on memory
pressure using a shrinker.
Only add the helper functions. They will be hooked up to the xe driver
in the upcoming patch.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/Makefile | 3 +-
drivers/gpu/drm/drm_pagemap.c | 79 +++++-
drivers/gpu/drm/drm_pagemap_util.c | 426 +++++++++++++++++++++++++++++
include/drm/drm_pagemap.h | 53 +++-
include/drm/drm_pagemap_util.h | 25 ++
5 files changed, 569 insertions(+), 17 deletions(-)
create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
create mode 100644 include/drm/drm_pagemap_util.h
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index c2672f369aed..cdca68fd9f23 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
drm_gpusvm_helper-y := \
drm_gpusvm.o\
- drm_pagemap.o
+ drm_pagemap.o\
+ drm_pagemap_util.o
obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index fb18a80d6a1c..5ca5b2b53bc1 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
#include <drm/drm_print.h>
/**
@@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref *ref)
* pagemap provider drm_device and its module.
*/
dpagemap->dev_hold = NULL;
- kfree(dpagemap);
+ drm_pagemap_shrinker_add(dpagemap);
llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
schedule_work(&drm_pagemap_work);
/*
@@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
return dev_hold;
}
+/**
+ * drm_pagemap_reinit() - Reinitialize a drm_pagemap
+ * @dpagemap: The drm_pagemap to reinitialize
+ *
+ * Reinitialize a drm_pagemap, for which drm_pagemap_release
+ * has already been called. This interface is intended for the
+ * situation where the driver caches a destroyed drm_pagemap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
+{
+ dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
+ if (IS_ERR(dpagemap->dev_hold))
+ return PTR_ERR(dpagemap->dev_hold);
+
+ kref_init(&dpagemap->ref);
+ return 0;
+}
+EXPORT_SYMBOL(drm_pagemap_reinit);
+
+/**
+ * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
+ * @dpagemap: The drm_pagemap to initialize.
+ * @pagemap: The associated dev_pagemap providing the device
+ * private pages.
+ * @drm: The drm device. The drm_pagemap holds a reference on the
+ * drm_device and the module owning the drm_device until
+ * drm_pagemap_release(). This facilitates drm_pagemap exporting.
+ * @ops: The drm_pagemap ops.
+ *
+ * Initialize and take an initial reference on a drm_pagemap.
+ * After successful return, use drm_pagemap_put() to destroy.
+ *
+ ** Return: 0 on success, negative error code on error.
+ */
+int drm_pagemap_init(struct drm_pagemap *dpagemap,
+ struct dev_pagemap *pagemap,
+ struct drm_device *drm,
+ const struct drm_pagemap_ops *ops)
+{
+ kref_init(&dpagemap->ref);
+ dpagemap->ops = ops;
+ dpagemap->pagemap = pagemap;
+ dpagemap->drm = drm;
+ dpagemap->cache = NULL;
+ INIT_LIST_HEAD(&dpagemap->shrink_link);
+
+ return drm_pagemap_reinit(dpagemap);
+}
+EXPORT_SYMBOL(drm_pagemap_init);
+
/**
* drm_pagemap_create() - Create a struct drm_pagemap.
* @drm: Pointer to a struct drm_device providing the device-private memory.
@@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
const struct drm_pagemap_ops *ops)
{
struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
- struct drm_pagemap_dev_hold *dev_hold;
+ int err;
if (!dpagemap)
return ERR_PTR(-ENOMEM);
- kref_init(&dpagemap->ref);
- dpagemap->drm = drm;
- dpagemap->ops = ops;
- dpagemap->pagemap = pagemap;
-
- dev_hold = drm_pagemap_dev_hold(dpagemap);
- if (IS_ERR(dev_hold)) {
- kfree(dpagemap);
- return ERR_CAST(dev_hold);
- }
- dpagemap->dev_hold = dev_hold;
+ err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
+ if (err)
+ return ERR_PTR(err);
return dpagemap;
}
@@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);
+void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
+{
+ if (dpagemap->ops->destroy)
+ dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
+ else
+ kfree(dpagemap);
+}
+
static void drm_pagemap_exit(void)
{
flush_work(&drm_pagemap_work);
diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
new file mode 100644
index 000000000000..e1a1d6bf25f4
--- /dev/null
+++ b/drivers/gpu/drm/drm_pagemap_util.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
+#include <drm/drm_print.h>
+
+/**
+ * struct drm_pagemap_cache - Lookup structure for pagemaps
+ *
+ * Structure to keep track of active (refcount > 1) and inactive
+ * (refcount == 0) pagemaps. Inactive pagemaps can be made active
+ * again by waiting for the @queued completion (indicating that the
+ * pagemap has been put on the @shrinker's list of shrinkable
+ * pagemaps, and then successfully removing it from @shrinker's
+ * list. The latter may fail if the shrinker is already in the
+ * process of freeing the pagemap. A struct drm_pagemap_cache can
+ * hold a single struct drm_pagemap.
+ */
+struct drm_pagemap_cache {
+ /** @lookup_mutex: Mutex making the lookup process atomic */
+ struct mutex lookup_mutex;
+ /** @lock: Lock protecting the @dpagemap pointer */
+ spinlock_t lock;
+ /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */
+ struct drm_pagemap_shrinker *shrinker;
+ /** @dpagemap: Non-refcounted pointer to the drm_pagemap */
+ struct drm_pagemap *dpagemap;
+ /**
+ * @queued: Signals when an inactive drm_pagemap has been put on
+ * @shrinker's list.
+ */
+ struct completion queued;
+};
+
+/**
+ * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps
+ */
+struct drm_pagemap_shrinker {
+ /** @drm: Pointer to the drm device. */
+ struct drm_device *drm;
+ /** @lock: Spinlock to protect the @dpagemaps list. */
+ spinlock_t lock;
+ /** @dpagemaps: List of unused dpagemaps. */
+ struct list_head dpagemaps;
+ /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */
+ atomic_t num_dpagemaps;
+ /** @shrink: Pointer to the struct shrinker. */
+ struct shrinker *shrink;
+};
+
+static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap);
+
+static void drm_pagemap_cache_fini(void *arg)
+{
+ struct drm_pagemap_cache *cache = arg;
+ struct drm_pagemap *dpagemap;
+
+ drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n");
+ spin_lock(&cache->lock);
+ dpagemap = cache->dpagemap;
+ if (!dpagemap) {
+ spin_unlock(&cache->lock);
+ goto out;
+ }
+
+ if (drm_pagemap_shrinker_cancel(dpagemap)) {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ drm_pagemap_destroy(dpagemap, false);
+ }
+
+out:
+ mutex_destroy(&cache->lookup_mutex);
+ kfree(cache);
+}
+
+/**
+ * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
+ * @shrinker: Pointer to a struct drm_pagemap_shrinker.
+ *
+ * Create a device-managed drm_pagemap cache. The cache is automatically
+ * destroyed on struct device removal, at which point any *inactive*
+ * drm_pagemap's are destroyed.
+ *
+ * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer
+ * on failure.
+ */
+struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker)
+{
+ struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ int err;
+
+ if (!cache)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&cache->lookup_mutex);
+ spin_lock_init(&cache->lock);
+ cache->shrinker = shrinker;
+ init_completion(&cache->queued);
+ err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache);
+ if (err)
+ return ERR_PTR(err);
+
+ return cache;
+}
+EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
+
+/**
+ * DOC: Cache lookup
+ *
+ * Cache lookup should be done under a locked mutex, so that a
+ * failed drm_pagemap_get_from_cache() and a following
+ * drm_pagemap_cache_setpagemap() are carried out as an atomic
+ * operation WRT other lookups. Otherwise, racing lookups may
+ * unnecessarily concurrently create pagemaps to fulfill a
+ * failed lookup. The API provides two functions to perform this lock,
+ * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they
+ * should be used in the following way:
+ *
+ * .. code-block:: c
+ *
+ * drm_pagemap_lock_lookup(cache);
+ * dpagemap = drm_pagemap_get_from_cache(cache);
+ * if (dpagemap)
+ * goto out_unlock;
+ *
+ * dpagemap = driver_create_new_dpagemap();
+ * if (!IS_ERR(dpagemap))
+ * drm_pagemap_cache_set_pagemap(cache, dpagemap);
+ *
+ * out_unlock:
+ * drm_pagemap_unlock_lookup(cache);
+ */
+
+/**
+ * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for lookup
+ * @cache: The drm_pagemap_cache to lock.
+ *
+ * Return: %-EINTR if interrupted while blocking. %0 otherwise.
+ */
+int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
+{
+ return mutex_lock_interruptible(&cache->lookup_mutex);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
+
+/**
+ * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache after lookup
+ * @cache: The drm_pagemap_cache to unlock.
+ */
+void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache)
+{
+ mutex_unlock(&cache->lookup_mutex);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
+
+/**
+ * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
+ * @cache: The cache used for lookup.
+ *
+ * If an active pagemap is present in the cache, it is immediately returned.
+ * If an inactive pagemap is present, it's removed from the shrinker list and
+ * an attempt is made to make it active.
+ * If no pagemap present or the attempt to make it active failed, %NULL is returned
+ * to indicate to the caller to create a new drm_pagemap and insert it into
+ * the cache.
+ *
+ * Return: A reference-counted pointer to a drm_pagemap if successful. An error
+ * pointer if an error occurred, or %NULL if no drm_pagemap was found and
+ * the caller should insert a new one.
+ */
+struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache)
+{
+ struct drm_pagemap *dpagemap;
+ int err;
+
+ lockdep_assert_held(&cache->lookup_mutex);
+retry:
+ spin_lock(&cache->lock);
+ dpagemap = cache->dpagemap;
+ if (drm_pagemap_get_unless_zero(dpagemap)) {
+ spin_unlock(&cache->lock);
+ return dpagemap;
+ }
+
+ if (!dpagemap) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ if (!try_wait_for_completion(&cache->queued)) {
+ spin_unlock(&cache->lock);
+ err = wait_for_completion_interruptible(&cache->queued);
+ if (err)
+ return ERR_PTR(err);
+ goto retry;
+ }
+
+ if (drm_pagemap_shrinker_cancel(dpagemap)) {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ err = drm_pagemap_reinit(dpagemap);
+ if (err) {
+ drm_pagemap_destroy(dpagemap, false);
+ return ERR_PTR(err);
+ }
+ drm_pagemap_cache_set_pagemap(cache, dpagemap);
+ } else {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ dpagemap = NULL;
+ }
+
+ return dpagemap;
+}
+EXPORT_SYMBOL(drm_pagemap_get_from_cache);
+
+/**
+ * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache
+ * @cache: The cache to assign the drm_pagemap to.
+ * @dpagemap: The drm_pagemap to assign.
+ *
+ * The function must be called to populate a drm_pagemap_cache only
+ * after a call to drm_pagemap_get_from_cache() returns NULL.
+ */
+void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap)
+{
+ struct drm_device *drm = dpagemap->drm;
+
+ lockdep_assert_held(&cache->lookup_mutex);
+ spin_lock(&cache->lock);
+ dpagemap->cache = cache;
+ swap(cache->dpagemap, dpagemap);
+ reinit_completion(&cache->queued);
+ spin_unlock(&cache->lock);
+ drm_WARN_ON(drm, !!dpagemap);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
+
+/**
+ * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps
+ * @cache: The cache to lookup from.
+ *
+ * Function that should be used to lookup a drm_pagemap that is already active.
+ * (refcount > 0).
+ *
+ * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise.
+ */
+struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache)
+{
+ struct drm_pagemap *dpagemap;
+
+ spin_lock(&cache->lock);
+ dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
+ spin_unlock(&cache->lock);
+
+ return dpagemap;
+}
+EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
+
+static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_cache *cache = dpagemap->cache;
+ struct drm_pagemap_shrinker *shrinker = cache->shrinker;
+
+ spin_lock(&shrinker->lock);
+ if (list_empty(&dpagemap->shrink_link)) {
+ spin_unlock(&shrinker->lock);
+ return false;
+ }
+
+ list_del_init(&dpagemap->shrink_link);
+ atomic_dec(&shrinker->num_dpagemaps);
+ spin_unlock(&shrinker->lock);
+ return true;
+}
+
+/**
+ * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy
+ * @dpagemap: The drm_pagemap.
+ *
+ * If @dpagemap is associated with a &struct drm_pagemap_cache AND the
+ * struct device backing the drm device is still alive, add @dpagemap to
+ * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps.
+ *
+ * Otherwise destroy the pagemap directly using drm_pagemap_destroy().
+ *
+ * This is an internal function which is not intended to be exposed to drivers.
+ */
+void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_cache *cache;
+ struct drm_pagemap_shrinker *shrinker;
+ int idx;
+
+ /*
+ * The pagemap cache and shrinker are disabled at
+ * pci device remove time. After that, dpagemaps
+ * are freed directly.
+ */
+ if (!drm_dev_enter(dpagemap->drm, &idx))
+ goto out_no_cache;
+
+ cache = dpagemap->cache;
+ if (!cache) {
+ drm_dev_exit(idx);
+ goto out_no_cache;
+ }
+
+ shrinker = cache->shrinker;
+ spin_lock(&shrinker->lock);
+ list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps);
+ atomic_inc(&shrinker->num_dpagemaps);
+ spin_unlock(&shrinker->lock);
+ complete_all(&cache->queued);
+ drm_dev_exit(idx);
+ return;
+
+out_no_cache:
+ drm_pagemap_destroy(dpagemap, true);
+}
+
+static unsigned long
+drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct drm_pagemap_shrinker *shrinker = shrink->private_data;
+ unsigned long count = atomic_read(&shrinker->num_dpagemaps);
+
+ return count ? : SHRINK_EMPTY;
+}
+
+static unsigned long
+drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct drm_pagemap_shrinker *shrinker = shrink->private_data;
+ struct drm_pagemap *dpagemap;
+ struct drm_pagemap_cache *cache;
+ unsigned long nr_freed = 0;
+
+ sc->nr_scanned = 0;
+ spin_lock(&shrinker->lock);
+ do {
+ dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap),
+ shrink_link);
+ if (!dpagemap)
+ break;
+
+ atomic_dec(&shrinker->num_dpagemaps);
+ list_del_init(&dpagemap->shrink_link);
+ spin_unlock(&shrinker->lock);
+
+ sc->nr_scanned++;
+ nr_freed++;
+
+ cache = dpagemap->cache;
+ spin_lock(&cache->lock);
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+
+ drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap);
+ drm_pagemap_destroy(dpagemap, true);
+ spin_lock(&shrinker->lock);
+ } while (sc->nr_scanned < sc->nr_to_scan);
+ spin_unlock(&shrinker->lock);
+
+ return sc->nr_scanned ? nr_freed : SHRINK_STOP;
+}
+
+static void drm_pagemap_shrinker_fini(void *arg)
+{
+ struct drm_pagemap_shrinker *shrinker = arg;
+
+ drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
+ drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps));
+ shrinker_free(shrinker->shrink);
+ kfree(shrinker);
+}
+
+/**
+ * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker
+ * @drm: The drm device
+ *
+ * Create and register a pagemap shrinker that shrinks unused pagemaps
+ * and thereby reduces memory footprint.
+ * The shrinker is drm_device managed and unregisters itself when
+ * the drm device is removed.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
+{
+ struct drm_pagemap_shrinker *shrinker;
+ struct shrinker *shrink;
+ int err;
+
+ shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
+ if (!shrinker)
+ return ERR_PTR(-ENOMEM);
+
+ shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique);
+ if (!shrink) {
+ kfree(shrinker);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&shrinker->lock);
+ INIT_LIST_HEAD(&shrinker->dpagemaps);
+ shrinker->drm = drm;
+ shrinker->shrink = shrink;
+ shrink->count_objects = drm_pagemap_shrinker_count;
+ shrink->scan_objects = drm_pagemap_shrinker_scan;
+ shrink->private_data = shrinker;
+ shrinker_register(shrink);
+
+ err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker);
+ if (err)
+ return ERR_PTR(err);
+
+ return shrinker;
+}
+EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index 5cfe54331ba7..4b9af5e785c6 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -9,6 +9,7 @@
#define NR_PAGES(order) (1U << (order))
struct drm_pagemap;
+struct drm_pagemap_cache;
struct drm_pagemap_dev_hold;
struct drm_pagemap_zdd;
struct device;
@@ -124,6 +125,25 @@ struct drm_pagemap_ops {
unsigned long start, unsigned long end,
struct mm_struct *mm,
unsigned long timeslice_ms);
+ /**
+ * @destroy: Destroy the drm_pagemap and associated resources.
+ * @dpagemap: The drm_pagemap to destroy.
+ * @is_atomic_or_reclaim: The function may be called from
+ * atomic- or reclaim context.
+ *
+ * The implementation should take care not to attempt to
+ * destroy resources that may already have been destroyed
+ * using devm_ callbacks, since this function may be called
+ * after the underlying struct device has been unbound.
+ * If the implementation defers the execution to a work item
+ * to avoid locking issues, then it must make sure the work
+ * items are flushed before module exit. If the destroy call
+ * happens after the provider's pci_remove() callback has
+ * been executed, a module reference and drm device reference is
+ * held across the destroy callback.
+ */
+ void (*destroy)(struct drm_pagemap *dpagemap,
+ bool is_atomic_or_reclaim);
};
/**
@@ -135,6 +155,10 @@ struct drm_pagemap_ops {
* @pagemap: Pointer to the underlying dev_pagemap.
* @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
* device referencing.
+ * @cache: Back-pointer to the &struct drm_pagemap_cache used for this
+ * &struct drm_pagemap. May be NULL if no cache is used.
+ * @shrink_link: Link into the shrinker's list of drm_pagemaps. Only
+ * used if also using a pagemap cache.
*/
struct drm_pagemap {
const struct drm_pagemap_ops *ops;
@@ -142,6 +166,8 @@ struct drm_pagemap {
struct drm_device *drm;
struct dev_pagemap *pagemap;
struct drm_pagemap_dev_hold *dev_hold;
+ struct drm_pagemap_cache *cache;
+ struct list_head shrink_link;
};
struct drm_pagemap_devmem;
@@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
unsigned long npages);
};
+int drm_pagemap_init(struct drm_pagemap *dpagemap,
+ struct dev_pagemap *pagemap,
+ struct drm_device *drm,
+ const struct drm_pagemap_ops *ops);
+
struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops);
@@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
/**
* drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
- * @dpagemap: Pointer to the struct drm_pagemap.
+ * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
*
- * Return: Pointer to the struct drm_pagemap.
+ * Return: Pointer to the struct drm_pagemap, or NULL.
*/
static inline struct drm_pagemap *
drm_pagemap_get(struct drm_pagemap *dpagemap)
@@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap *dpagemap)
return dpagemap;
}
+/**
+ * drm_pagemap_get_unless_zero() - Obtain a reference on a struct drm_pagemap
+ * unless the current reference count is zero.
+ * @dpagemap: Pointer to the drm_pagemap or NULL.
+ *
+ * Return: A pointer to @dpagemap if the reference count was successfully
+ * incremented. NULL if @dpagemap was NULL, or its refcount was 0.
+ */
+static inline struct drm_pagemap * __must_check
+drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
+{
+ return (dpagemap && kref_get_unless_zero(&dpagemap->ref)) ? dpagemap : NULL;
+}
+
/**
* struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
*
@@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct mm_struct *mm,
unsigned long timeslice_ms);
-#endif
+void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim);
+int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
+#endif
diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
new file mode 100644
index 000000000000..292244d429ee
--- /dev/null
+++ b/include/drm/drm_pagemap_util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef _DRM_PAGEMAP_UTIL_H_
+#define _DRM_PAGEMAP_UTIL_H_
+
+struct drm_device;
+struct drm_pagemap;
+struct drm_pagemap_cache;
+struct drm_pagemap_shrinker;
+
+void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
+
+int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
+
+void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache);
+
+struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
+
+struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker);
+
+struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
+
+void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
+
+struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
+#endif
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
@ 2025-10-28 1:23 ` Matthew Brost
2025-10-28 9:46 ` Thomas Hellström
2025-10-29 22:41 ` Matthew Brost
2025-10-29 22:48 ` Matthew Brost
2 siblings, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 1:23 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> Pagemaps are costly to set up and tear down, and they consume a lot
> of system memory for the struct pages. Ideally they should be
> created only when needed.
>
> Add a caching mechanism to allow doing just that: Create the drm_pagemaps
> when needed for migration. Keep them around to avoid destruction and
> re-creation latencies and destroy inactive/unused drm_pagemaps on memory
> pressure using a shrinker.
>
> Only add the helper functions. They will be hooked up to the xe driver
> in the upcoming patch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/Makefile | 3 +-
> drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> drivers/gpu/drm/drm_pagemap_util.c | 426 +++++++++++++++++++++++++++++
> include/drm/drm_pagemap.h | 53 +++-
> include/drm/drm_pagemap_util.h | 25 ++
> 5 files changed, 569 insertions(+), 17 deletions(-)
> create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> create mode 100644 include/drm/drm_pagemap_util.h
>
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index c2672f369aed..cdca68fd9f23 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
>
> drm_gpusvm_helper-y := \
> drm_gpusvm.o\
> - drm_pagemap.o
> + drm_pagemap.o\
> + drm_pagemap_util.o
> obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
>
> obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index fb18a80d6a1c..5ca5b2b53bc1 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -8,6 +8,7 @@
> #include <linux/pagemap.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> #include <drm/drm_print.h>
>
> /**
> @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref *ref)
> * pagemap provider drm_device and its module.
> */
> dpagemap->dev_hold = NULL;
> - kfree(dpagemap);
> + drm_pagemap_shrinker_add(dpagemap);
> llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> schedule_work(&drm_pagemap_work);
> /*
> @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> return dev_hold;
> }
>
> +/**
> + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> + * @dpagemap: The drm_pagemap to reinitialize
> + *
> + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> + * has already been called. This interface is intended for the
> + * situation where the driver caches a destroyed drm_pagemap.
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> +{
> + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> + if (IS_ERR(dpagemap->dev_hold))
> + return PTR_ERR(dpagemap->dev_hold);
> +
> + kref_init(&dpagemap->ref);
> + return 0;
> +}
> +EXPORT_SYMBOL(drm_pagemap_reinit);
> +
> +/**
> + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> + * @dpagemap: The drm_pagemap to initialize.
> + * @pagemap: The associated dev_pagemap providing the device
> + * private pages.
> + * @drm: The drm device. The drm_pagemap holds a reference on the
> + * drm_device and the module owning the drm_device until
> + * drm_pagemap_release(). This facilitates drm_pagemap exporting.
> + * @ops: The drm_pagemap ops.
> + *
> + * Initialize and take an initial reference on a drm_pagemap.
> + * After successful return, use drm_pagemap_put() to destroy.
> + *
> + ** Return: 0 on success, negative error code on error.
> + */
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops)
> +{
> + kref_init(&dpagemap->ref);
> + dpagemap->ops = ops;
> + dpagemap->pagemap = pagemap;
> + dpagemap->drm = drm;
> + dpagemap->cache = NULL;
> + INIT_LIST_HEAD(&dpagemap->shrink_link);
> +
> + return drm_pagemap_reinit(dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_init);
> +
> /**
> * drm_pagemap_create() - Create a struct drm_pagemap.
> * @drm: Pointer to a struct drm_device providing the device-private memory.
> @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> const struct drm_pagemap_ops *ops)
> {
> struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> - struct drm_pagemap_dev_hold *dev_hold;
> + int err;
>
> if (!dpagemap)
> return ERR_PTR(-ENOMEM);
>
> - kref_init(&dpagemap->ref);
> - dpagemap->drm = drm;
> - dpagemap->ops = ops;
> - dpagemap->pagemap = pagemap;
> -
> - dev_hold = drm_pagemap_dev_hold(dpagemap);
> - if (IS_ERR(dev_hold)) {
> - kfree(dpagemap);
> - return ERR_CAST(dev_hold);
> - }
> - dpagemap->dev_hold = dev_hold;
> + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> + if (err)
> + return ERR_PTR(err);
>
> return dpagemap;
> }
> @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> }
> EXPORT_SYMBOL(drm_pagemap_populate_mm);
>
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
> +{
> + if (dpagemap->ops->destroy)
> + dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
> + else
> + kfree(dpagemap);
> +}
> +
> static void drm_pagemap_exit(void)
> {
> flush_work(&drm_pagemap_work);
> diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
> new file mode 100644
> index 000000000000..e1a1d6bf25f4
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_pagemap_util.c
> @@ -0,0 +1,426 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> +#include <drm/drm_print.h>
> +
> +/**
> + * struct drm_pagemap_cache - Lookup structure for pagemaps
> + *
> + * Structure to keep track of active (refcount > 1) and inactive
> + * (refcount == 0) pagemaps. Inactive pagemaps can be made active
> + * again by waiting for the @queued completion (indicating that the
> + * pagemap has been put on the @shrinker's list of shrinkable
> + * pagemaps, and then successfully removing it from @shrinker's
> + * list. The latter may fail if the shrinker is already in the
> + * process of freeing the pagemap. A struct drm_pagemap_cache can
> + * hold a single struct drm_pagemap.
> + */
> +struct drm_pagemap_cache {
> + /** @lookup_mutex: Mutex making the lookup process atomic */
> + struct mutex lookup_mutex;
> + /** @lock: Lock protecting the @dpagemap pointer */
> + spinlock_t lock;
> + /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */
> + struct drm_pagemap_shrinker *shrinker;
> + /** @dpagemap: Non-refcounted pointer to the drm_pagemap */
> + struct drm_pagemap *dpagemap;
> + /**
> + * @queued: Signals when an inactive drm_pagemap has been put on
> + * @shrinker's list.
> + */
> + struct completion queued;
> +};
> +
> +/**
> + * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps
> + */
> +struct drm_pagemap_shrinker {
> + /** @drm: Pointer to the drm device. */
> + struct drm_device *drm;
> + /** @lock: Spinlock to protect the @dpagemaps list. */
> + spinlock_t lock;
> + /** @dpagemaps: List of unused dpagemaps. */
> + struct list_head dpagemaps;
> + /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */
> + atomic_t num_dpagemaps;
> + /** @shrink: Pointer to the struct shrinker. */
> + struct shrinker *shrink;
> +};
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap);
> +
> +static void drm_pagemap_cache_fini(void *arg)
> +{
> + struct drm_pagemap_cache *cache = arg;
> + struct drm_pagemap *dpagemap;
> +
> + drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n");
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + goto out;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + drm_pagemap_destroy(dpagemap, false);
> + }
> +
> +out:
> + mutex_destroy(&cache->lookup_mutex);
> + kfree(cache);
> +}
> +
> +/**
> + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> + *
> + * Create a device-managed drm_pagemap cache. The cache is automatically
> + * destroyed on struct device removal, at which point any *inactive*
> + * drm_pagemap's are destroyed.
> + *
> + * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer
> + * on failure.
> + */
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker)
> +{
> + struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
> + int err;
> +
> + if (!cache)
> + return ERR_PTR(-ENOMEM);
> +
> + mutex_init(&cache->lookup_mutex);
> + spin_lock_init(&cache->lock);
> + cache->shrinker = shrinker;
> + init_completion(&cache->queued);
> + err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache);
> + if (err)
> + return ERR_PTR(err);
> +
> + return cache;
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> +
> +/**
> + * DOC: Cache lookup
> + *
> + * Cache lookup should be done under a locked mutex, so that a
> + * failed drm_pagemap_get_from_cache() and a following
> + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> + * operation WRT other lookups. Otherwise, racing lookups may
> + * unnecessarily concurrently create pagemaps to fulfill a
> + * failed lookup. The API provides two functions to perform this lock,
> + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they
> + * should be used in the following way:
> + *
> + * .. code-block:: c
> + *
> + * drm_pagemap_lock_lookup(cache);
> + * dpagemap = drm_pagemap_get_from_cache(cache);
> + * if (dpagemap)
> + * goto out_unlock;
> + *
> + * dpagemap = driver_create_new_dpagemap();
> + * if (!IS_ERR(dpagemap))
> + * drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + *
> + * out_unlock:
> + * drm_pagemap_unlock_lookup(cache);
> + */
> +
> +/**
> + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for lookup
> + * @cache: The drm_pagemap_cache to lock.
> + *
> + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> + */
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
> +{
> + return mutex_lock_interruptible(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> +
> +/**
> + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache after lookup
> + * @cache: The drm_pagemap_cache to unlock.
> + */
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache)
> +{
> + mutex_unlock(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> +
> +/**
> + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
> + * @cache: The cache used for lookup.
> + *
> + * If an active pagemap is present in the cache, it is immediately returned.
> + * If an inactive pagemap is present, it's removed from the shrinker list and
> + * an attempt is made to make it active.
> + * If no pagemap present or the attempt to make it active failed, %NULL is returned
> + * to indicate to the caller to create a new drm_pagemap and insert it into
> + * the cache.
> + *
> + * Return: A reference-counted pointer to a drm_pagemap if successful. An error
> + * pointer if an error occurred, or %NULL if no drm_pagemap was found and
> + * the caller should insert a new one.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache)
> +{
> + struct drm_pagemap *dpagemap;
> + int err;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> +retry:
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (drm_pagemap_get_unless_zero(dpagemap)) {
> + spin_unlock(&cache->lock);
> + return dpagemap;
> + }
> +
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + return NULL;
> + }
> +
> + if (!try_wait_for_completion(&cache->queued)) {
> + spin_unlock(&cache->lock);
> + err = wait_for_completion_interruptible(&cache->queued);
> + if (err)
> + return ERR_PTR(err);
> + goto retry;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + err = drm_pagemap_reinit(dpagemap);
> + if (err) {
> + drm_pagemap_destroy(dpagemap, false);
> + return ERR_PTR(err);
> + }
> + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + } else {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + dpagemap = NULL;
> + }
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> +
> +/**
> + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache
> + * @cache: The cache to assign the drm_pagemap to.
> + * @dpagemap: The drm_pagemap to assign.
> + *
> + * The function must be called to populate a drm_pagemap_cache only
> + * after a call to drm_pagemap_get_from_cache() returns NULL.
> + */
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap)
> +{
> + struct drm_device *drm = dpagemap->drm;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> + spin_lock(&cache->lock);
> + dpagemap->cache = cache;
> + swap(cache->dpagemap, dpagemap);
> + reinit_completion(&cache->queued);
> + spin_unlock(&cache->lock);
> + drm_WARN_ON(drm, !!dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> +
> +/**
> + * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps
> + * @cache: The cache to lookup from.
> + *
> + * Function that should be used to lookup a drm_pagemap that is already active.
> + * (refcount > 0).
> + *
> + * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache)
> +{
> + struct drm_pagemap *dpagemap;
> +
> + spin_lock(&cache->lock);
> + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> + spin_unlock(&cache->lock);
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_cache *cache = dpagemap->cache;
> + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> +
> + spin_lock(&shrinker->lock);
> + if (list_empty(&dpagemap->shrink_link)) {
> + spin_unlock(&shrinker->lock);
> + return false;
> + }
> +
> + list_del_init(&dpagemap->shrink_link);
> + atomic_dec(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + return true;
> +}
> +
> +/**
> + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy
> + * @dpagemap: The drm_pagemap.
> + *
> + * If @dpagemap is associated with a &struct drm_pagemap_cache AND the
> + * struct device backing the drm device is still alive, add @dpagemap to
> + * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps.
> + *
> + * Otherwise destroy the pagemap directly using drm_pagemap_destroy().
> + *
> + * This is an internal function which is not intended to be exposed to drivers.
> + */
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
Not a full review - slowly wrapping my head around the first 6 patches
but one quick question.
This is called from drm_pagemap_put. How do we know what type of context
we're in? It seems like this could be called from either process context
or atomic context (e.g., via drm_pagemap_zdd_destroy through
drm_pagemap_page_free). This code doesn’t appear to work in atomic
contexts—if I recall correctly, drm_dev_enter can’t be called from
atomic context. Also, we're missing irqsave on the spinlock.
We had a worker for ZDD destroy at one point—should we revive that? If
we did, I think we could safely enforce a rule that drm_pagemap
operations must only be called from process context.
Matt
> +{
> + struct drm_pagemap_cache *cache;
> + struct drm_pagemap_shrinker *shrinker;
> + int idx;
> +
> + /*
> + * The pagemap cache and shrinker are disabled at
> + * pci device remove time. After that, dpagemaps
> + * are freed directly.
> + */
> + if (!drm_dev_enter(dpagemap->drm, &idx))
> + goto out_no_cache;
> +
> + cache = dpagemap->cache;
> + if (!cache) {
> + drm_dev_exit(idx);
> + goto out_no_cache;
> + }
> +
> + shrinker = cache->shrinker;
> + spin_lock(&shrinker->lock);
> + list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps);
> + atomic_inc(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + complete_all(&cache->queued);
> + drm_dev_exit(idx);
> + return;
> +
> +out_no_cache:
> + drm_pagemap_destroy(dpagemap, true);
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + unsigned long count = atomic_read(&shrinker->num_dpagemaps);
> +
> + return count ? : SHRINK_EMPTY;
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + struct drm_pagemap *dpagemap;
> + struct drm_pagemap_cache *cache;
> + unsigned long nr_freed = 0;
> +
> + sc->nr_scanned = 0;
> + spin_lock(&shrinker->lock);
> + do {
> + dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap),
> + shrink_link);
> + if (!dpagemap)
> + break;
> +
> + atomic_dec(&shrinker->num_dpagemaps);
> + list_del_init(&dpagemap->shrink_link);
> + spin_unlock(&shrinker->lock);
> +
> + sc->nr_scanned++;
> + nr_freed++;
> +
> + cache = dpagemap->cache;
> + spin_lock(&cache->lock);
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> +
> + drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap);
> + drm_pagemap_destroy(dpagemap, true);
> + spin_lock(&shrinker->lock);
> + } while (sc->nr_scanned < sc->nr_to_scan);
> + spin_unlock(&shrinker->lock);
> +
> + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> +}
> +
> +static void drm_pagemap_shrinker_fini(void *arg)
> +{
> + struct drm_pagemap_shrinker *shrinker = arg;
> +
> + drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
> + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps));
> + shrinker_free(shrinker->shrink);
> + kfree(shrinker);
> +}
> +
> +/**
> + * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker
> + * @drm: The drm device
> + *
> + * Create and register a pagemap shrinker that shrinks unused pagemaps
> + * and thereby reduces memory footprint.
> + * The shrinker is drm_device managed and unregisters itself when
> + * the drm device is removed.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> +{
> + struct drm_pagemap_shrinker *shrinker;
> + struct shrinker *shrink;
> + int err;
> +
> + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> + if (!shrinker)
> + return ERR_PTR(-ENOMEM);
> +
> + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique);
> + if (!shrink) {
> + kfree(shrinker);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + spin_lock_init(&shrinker->lock);
> + INIT_LIST_HEAD(&shrinker->dpagemaps);
> + shrinker->drm = drm;
> + shrinker->shrink = shrink;
> + shrink->count_objects = drm_pagemap_shrinker_count;
> + shrink->scan_objects = drm_pagemap_shrinker_scan;
> + shrink->private_data = shrinker;
> + shrinker_register(shrink);
> +
> + err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker);
> + if (err)
> + return ERR_PTR(err);
> +
> + return shrinker;
> +}
> +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index 5cfe54331ba7..4b9af5e785c6 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -9,6 +9,7 @@
> #define NR_PAGES(order) (1U << (order))
>
> struct drm_pagemap;
> +struct drm_pagemap_cache;
> struct drm_pagemap_dev_hold;
> struct drm_pagemap_zdd;
> struct device;
> @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> unsigned long start, unsigned long end,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
> + /**
> + * @destroy: Destroy the drm_pagemap and associated resources.
> + * @dpagemap: The drm_pagemap to destroy.
> + * @is_atomic_or_reclaim: The function may be called from
> + * atomic- or reclaim context.
> + *
> + * The implementation should take care not to attempt to
> + * destroy resources that may already have been destroyed
> + * using devm_ callbacks, since this function may be called
> + * after the underlying struct device has been unbound.
> + * If the implementation defers the execution to a work item
> + * to avoid locking issues, then it must make sure the work
> + * items are flushed before module exit. If the destroy call
> + * happens after the provider's pci_remove() callback has
> + * been executed, a module reference and drm device reference is
> + * held across the destroy callback.
> + */
> + void (*destroy)(struct drm_pagemap *dpagemap,
> + bool is_atomic_or_reclaim);
> };
>
> /**
> @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> * @pagemap: Pointer to the underlying dev_pagemap.
> * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> * device referencing.
> + * @cache: Back-pointer to the &struct drm_pagemap_cache used for this
> + * &struct drm_pagemap. May be NULL if no cache is used.
> + * @shrink_link: Link into the shrinker's list of drm_pagemaps. Only
> + * used if also using a pagemap cache.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> @@ -142,6 +166,8 @@ struct drm_pagemap {
> struct drm_device *drm;
> struct dev_pagemap *pagemap;
> struct drm_pagemap_dev_hold *dev_hold;
> + struct drm_pagemap_cache *cache;
> + struct list_head shrink_link;
> };
>
> struct drm_pagemap_devmem;
> @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops);
> +
> struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> struct dev_pagemap *pagemap,
> const struct drm_pagemap_ops *ops);
> @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
>
> /**
> * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> - * @dpagemap: Pointer to the struct drm_pagemap.
> + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> *
> - * Return: Pointer to the struct drm_pagemap.
> + * Return: Pointer to the struct drm_pagemap, or NULL.
> */
> static inline struct drm_pagemap *
> drm_pagemap_get(struct drm_pagemap *dpagemap)
> @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap *dpagemap)
> return dpagemap;
> }
>
> +/**
> + * drm_pagemap_get_unless_zero() - Obtain a reference on a struct drm_pagemap
> + * unless the current reference count is zero.
> + * @dpagemap: Pointer to the drm_pagemap or NULL.
> + *
> + * Return: A pointer to @dpagemap if the reference count was successfully
> + * incremented. NULL if @dpagemap was NULL, or its refcount was 0.
> + */
> +static inline struct drm_pagemap * __must_check
> +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> +{
> + return (dpagemap && kref_get_unless_zero(&dpagemap->ref)) ? dpagemap : NULL;
> +}
> +
> /**
> * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
> *
> @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
>
> -#endif
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim);
>
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> +#endif
> diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
> new file mode 100644
> index 000000000000..292244d429ee
> --- /dev/null
> +++ b/include/drm/drm_pagemap_util.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: MIT */
> +#ifndef _DRM_PAGEMAP_UTIL_H_
> +#define _DRM_PAGEMAP_UTIL_H_
> +
> +struct drm_device;
> +struct drm_pagemap;
> +struct drm_pagemap_cache;
> +struct drm_pagemap_shrinker;
> +
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> +
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache);
> +
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> +
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
> +#endif
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-28 1:23 ` Matthew Brost
@ 2025-10-28 9:46 ` Thomas Hellström
2025-10-28 10:29 ` Thomas Hellström
0 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-28 9:46 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Mon, 2025-10-27 at 18:23 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> > Pagemaps are costly to set up and tear down, and they consume a lot
> > of system memory for the struct pages. Ideally they should be
> > created only when needed.
> >
> > Add a caching mechanism to allow doing just that: Create the
> > drm_pagemaps
> > when needed for migration. Keep them around to avoid destruction
> > and
> > re-creation latencies and destroy inactive/unused drm_pagemaps on
> > memory
> > pressure using a shrinker.
> >
> > Only add the helper functions. They will be hooked up to the xe
> > driver
> > in the upcoming patch.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/Makefile | 3 +-
> > drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> > drivers/gpu/drm/drm_pagemap_util.c | 426
> > +++++++++++++++++++++++++++++
> > include/drm/drm_pagemap.h | 53 +++-
> > include/drm/drm_pagemap_util.h | 25 ++
> > 5 files changed, 569 insertions(+), 17 deletions(-)
> > create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> > create mode 100644 include/drm/drm_pagemap_util.h
> >
> > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > index c2672f369aed..cdca68fd9f23 100644
> > --- a/drivers/gpu/drm/Makefile
> > +++ b/drivers/gpu/drm/Makefile
> > @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
> >
> > drm_gpusvm_helper-y := \
> > drm_gpusvm.o\
> > - drm_pagemap.o
> > + drm_pagemap.o\
> > + drm_pagemap_util.o
> > obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
> >
> > obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > b/drivers/gpu/drm/drm_pagemap.c
> > index fb18a80d6a1c..5ca5b2b53bc1 100644
> > --- a/drivers/gpu/drm/drm_pagemap.c
> > +++ b/drivers/gpu/drm/drm_pagemap.c
> > @@ -8,6 +8,7 @@
> > #include <linux/pagemap.h>
> > #include <drm/drm_drv.h>
> > #include <drm/drm_pagemap.h>
> > +#include <drm/drm_pagemap_util.h>
> > #include <drm/drm_print.h>
> >
> > /**
> > @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref
> > *ref)
> > * pagemap provider drm_device and its module.
> > */
> > dpagemap->dev_hold = NULL;
> > - kfree(dpagemap);
> > + drm_pagemap_shrinker_add(dpagemap);
> > llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> > schedule_work(&drm_pagemap_work);
> > /*
> > @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap
> > *dpagemap)
> > return dev_hold;
> > }
> >
> > +/**
> > + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> > + * @dpagemap: The drm_pagemap to reinitialize
> > + *
> > + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> > + * has already been called. This interface is intended for the
> > + * situation where the driver caches a destroyed drm_pagemap.
> > + *
> > + * Return: 0 on success, negative error code on failure.
> > + */
> > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> > +{
> > + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> > + if (IS_ERR(dpagemap->dev_hold))
> > + return PTR_ERR(dpagemap->dev_hold);
> > +
> > + kref_init(&dpagemap->ref);
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_reinit);
> > +
> > +/**
> > + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> > + * @dpagemap: The drm_pagemap to initialize.
> > + * @pagemap: The associated dev_pagemap providing the device
> > + * private pages.
> > + * @drm: The drm device. The drm_pagemap holds a reference on the
> > + * drm_device and the module owning the drm_device until
> > + * drm_pagemap_release(). This facilitates drm_pagemap exporting.
> > + * @ops: The drm_pagemap ops.
> > + *
> > + * Initialize and take an initial reference on a drm_pagemap.
> > + * After successful return, use drm_pagemap_put() to destroy.
> > + *
> > + ** Return: 0 on success, negative error code on error.
> > + */
> > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > + struct dev_pagemap *pagemap,
> > + struct drm_device *drm,
> > + const struct drm_pagemap_ops *ops)
> > +{
> > + kref_init(&dpagemap->ref);
> > + dpagemap->ops = ops;
> > + dpagemap->pagemap = pagemap;
> > + dpagemap->drm = drm;
> > + dpagemap->cache = NULL;
> > + INIT_LIST_HEAD(&dpagemap->shrink_link);
> > +
> > + return drm_pagemap_reinit(dpagemap);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_init);
> > +
> > /**
> > * drm_pagemap_create() - Create a struct drm_pagemap.
> > * @drm: Pointer to a struct drm_device providing the device-
> > private memory.
> > @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> > const struct drm_pagemap_ops *ops)
> > {
> > struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap),
> > GFP_KERNEL);
> > - struct drm_pagemap_dev_hold *dev_hold;
> > + int err;
> >
> > if (!dpagemap)
> > return ERR_PTR(-ENOMEM);
> >
> > - kref_init(&dpagemap->ref);
> > - dpagemap->drm = drm;
> > - dpagemap->ops = ops;
> > - dpagemap->pagemap = pagemap;
> > -
> > - dev_hold = drm_pagemap_dev_hold(dpagemap);
> > - if (IS_ERR(dev_hold)) {
> > - kfree(dpagemap);
> > - return ERR_CAST(dev_hold);
> > - }
> > - dpagemap->dev_hold = dev_hold;
> > + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> > + if (err)
> > + return ERR_PTR(err);
> >
> > return dpagemap;
> > }
> > @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct
> > drm_pagemap *dpagemap,
> > }
> > EXPORT_SYMBOL(drm_pagemap_populate_mm);
> >
> > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > is_atomic_or_reclaim)
> > +{
> > + if (dpagemap->ops->destroy)
> > + dpagemap->ops->destroy(dpagemap,
> > is_atomic_or_reclaim);
> > + else
> > + kfree(dpagemap);
> > +}
> > +
> > static void drm_pagemap_exit(void)
> > {
> > flush_work(&drm_pagemap_work);
> > diff --git a/drivers/gpu/drm/drm_pagemap_util.c
> > b/drivers/gpu/drm/drm_pagemap_util.c
> > new file mode 100644
> > index 000000000000..e1a1d6bf25f4
> > --- /dev/null
> > +++ b/drivers/gpu/drm/drm_pagemap_util.c
> > @@ -0,0 +1,426 @@
> > +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> > +/*
> > + * Copyright © 2025 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_drv.h>
> > +#include <drm/drm_managed.h>
> > +#include <drm/drm_pagemap.h>
> > +#include <drm/drm_pagemap_util.h>
> > +#include <drm/drm_print.h>
> > +
> > +/**
> > + * struct drm_pagemap_cache - Lookup structure for pagemaps
> > + *
> > + * Structure to keep track of active (refcount > 1) and inactive
> > + * (refcount == 0) pagemaps. Inactive pagemaps can be made active
> > + * again by waiting for the @queued completion (indicating that
> > the
> > + * pagemap has been put on the @shrinker's list of shrinkable
> > + * pagemaps, and then successfully removing it from @shrinker's
> > + * list. The latter may fail if the shrinker is already in the
> > + * process of freeing the pagemap. A struct drm_pagemap_cache can
> > + * hold a single struct drm_pagemap.
> > + */
> > +struct drm_pagemap_cache {
> > + /** @lookup_mutex: Mutex making the lookup process atomic
> > */
> > + struct mutex lookup_mutex;
> > + /** @lock: Lock protecting the @dpagemap pointer */
> > + spinlock_t lock;
> > + /** @shrinker: Pointer to the shrinker used for this
> > cache. Immutable. */
> > + struct drm_pagemap_shrinker *shrinker;
> > + /** @dpagemap: Non-refcounted pointer to the drm_pagemap
> > */
> > + struct drm_pagemap *dpagemap;
> > + /**
> > + * @queued: Signals when an inactive drm_pagemap has been
> > put on
> > + * @shrinker's list.
> > + */
> > + struct completion queued;
> > +};
> > +
> > +/**
> > + * struct drm_pagemap_shrinker - Shrinker to remove unused
> > pagemaps
> > + */
> > +struct drm_pagemap_shrinker {
> > + /** @drm: Pointer to the drm device. */
> > + struct drm_device *drm;
> > + /** @lock: Spinlock to protect the @dpagemaps list. */
> > + spinlock_t lock;
> > + /** @dpagemaps: List of unused dpagemaps. */
> > + struct list_head dpagemaps;
> > + /** @num_dpagemaps: Number of unused dpagemaps in
> > @dpagemaps. */
> > + atomic_t num_dpagemaps;
> > + /** @shrink: Pointer to the struct shrinker. */
> > + struct shrinker *shrink;
> > +};
> > +
> > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > *dpagemap);
> > +
> > +static void drm_pagemap_cache_fini(void *arg)
> > +{
> > + struct drm_pagemap_cache *cache = arg;
> > + struct drm_pagemap *dpagemap;
> > +
> > + drm_dbg(cache->shrinker->drm, "Destroying dpagemap
> > cache.\n");
> > + spin_lock(&cache->lock);
> > + dpagemap = cache->dpagemap;
> > + if (!dpagemap) {
> > + spin_unlock(&cache->lock);
> > + goto out;
> > + }
> > +
> > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > + cache->dpagemap = NULL;
> > + spin_unlock(&cache->lock);
> > + drm_pagemap_destroy(dpagemap, false);
> > + }
> > +
> > +out:
> > + mutex_destroy(&cache->lookup_mutex);
> > + kfree(cache);
> > +}
> > +
> > +/**
> > + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> > + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> > + *
> > + * Create a device-managed drm_pagemap cache. The cache is
> > automatically
> > + * destroyed on struct device removal, at which point any
> > *inactive*
> > + * drm_pagemap's are destroyed.
> > + *
> > + * Return: Pointer to a struct drm_pagemap_cache on success. Error
> > pointer
> > + * on failure.
> > + */
> > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > drm_pagemap_shrinker *shrinker)
> > +{
> > + struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache),
> > GFP_KERNEL);
> > + int err;
> > +
> > + if (!cache)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + mutex_init(&cache->lookup_mutex);
> > + spin_lock_init(&cache->lock);
> > + cache->shrinker = shrinker;
> > + init_completion(&cache->queued);
> > + err = devm_add_action_or_reset(shrinker->drm->dev,
> > drm_pagemap_cache_fini, cache);
> > + if (err)
> > + return ERR_PTR(err);
> > +
> > + return cache;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> > +
> > +/**
> > + * DOC: Cache lookup
> > + *
> > + * Cache lookup should be done under a locked mutex, so that a
> > + * failed drm_pagemap_get_from_cache() and a following
> > + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> > + * operation WRT other lookups. Otherwise, racing lookups may
> > + * unnecessarily concurrently create pagemaps to fulfill a
> > + * failed lookup. The API provides two functions to perform this
> > lock,
> > + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and
> > they
> > + * should be used in the following way:
> > + *
> > + * .. code-block:: c
> > + *
> > + * drm_pagemap_lock_lookup(cache);
> > + * dpagemap = drm_pagemap_get_from_cache(cache);
> > + * if (dpagemap)
> > + * goto out_unlock;
> > + *
> > + * dpagemap = driver_create_new_dpagemap();
> > + * if (!IS_ERR(dpagemap))
> > + * drm_pagemap_cache_set_pagemap(cache,
> > dpagemap);
> > + *
> > + * out_unlock:
> > + * drm_pagemap_unlock_lookup(cache);
> > + */
> > +
> > +/**
> > + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for
> > lookup
> > + * @cache: The drm_pagemap_cache to lock.
> > + *
> > + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> > + */
> > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
> > +{
> > + return mutex_lock_interruptible(&cache->lookup_mutex);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> > +
> > +/**
> > + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache
> > after lookup
> > + * @cache: The drm_pagemap_cache to unlock.
> > + */
> > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > *cache)
> > +{
> > + mutex_unlock(&cache->lookup_mutex);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> > +
> > +/**
> > + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
> > + * @cache: The cache used for lookup.
> > + *
> > + * If an active pagemap is present in the cache, it is immediately
> > returned.
> > + * If an inactive pagemap is present, it's removed from the
> > shrinker list and
> > + * an attempt is made to make it active.
> > + * If no pagemap present or the attempt to make it active failed,
> > %NULL is returned
> > + * to indicate to the caller to create a new drm_pagemap and
> > insert it into
> > + * the cache.
> > + *
> > + * Return: A reference-counted pointer to a drm_pagemap if
> > successful. An error
> > + * pointer if an error occurred, or %NULL if no drm_pagemap was
> > found and
> > + * the caller should insert a new one.
> > + */
> > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > drm_pagemap_cache *cache)
> > +{
> > + struct drm_pagemap *dpagemap;
> > + int err;
> > +
> > + lockdep_assert_held(&cache->lookup_mutex);
> > +retry:
> > + spin_lock(&cache->lock);
> > + dpagemap = cache->dpagemap;
> > + if (drm_pagemap_get_unless_zero(dpagemap)) {
> > + spin_unlock(&cache->lock);
> > + return dpagemap;
> > + }
> > +
> > + if (!dpagemap) {
> > + spin_unlock(&cache->lock);
> > + return NULL;
> > + }
> > +
> > + if (!try_wait_for_completion(&cache->queued)) {
> > + spin_unlock(&cache->lock);
> > + err = wait_for_completion_interruptible(&cache-
> > >queued);
> > + if (err)
> > + return ERR_PTR(err);
> > + goto retry;
> > + }
> > +
> > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > + cache->dpagemap = NULL;
> > + spin_unlock(&cache->lock);
> > + err = drm_pagemap_reinit(dpagemap);
> > + if (err) {
> > + drm_pagemap_destroy(dpagemap, false);
> > + return ERR_PTR(err);
> > + }
> > + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> > + } else {
> > + cache->dpagemap = NULL;
> > + spin_unlock(&cache->lock);
> > + dpagemap = NULL;
> > + }
> > +
> > + return dpagemap;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> > +
> > +/**
> > + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a
> > drm_pagemap_cache
> > + * @cache: The cache to assign the drm_pagemap to.
> > + * @dpagemap: The drm_pagemap to assign.
> > + *
> > + * The function must be called to populate a drm_pagemap_cache
> > only
> > + * after a call to drm_pagemap_get_from_cache() returns NULL.
> > + */
> > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > *cache, struct drm_pagemap *dpagemap)
> > +{
> > + struct drm_device *drm = dpagemap->drm;
> > +
> > + lockdep_assert_held(&cache->lookup_mutex);
> > + spin_lock(&cache->lock);
> > + dpagemap->cache = cache;
> > + swap(cache->dpagemap, dpagemap);
> > + reinit_completion(&cache->queued);
> > + spin_unlock(&cache->lock);
> > + drm_WARN_ON(drm, !!dpagemap);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> > +
> > +/**
> > + * drm_pagemap_get_from_cache_if_active() - Quick lookup of active
> > drm_pagemaps
> > + * @cache: The cache to lookup from.
> > + *
> > + * Function that should be used to lookup a drm_pagemap that is
> > already active.
> > + * (refcount > 0).
> > + *
> > + * Return: A pointer to the cache's drm_pagemap if it's active;
> > %NULL otherwise.
> > + */
> > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > drm_pagemap_cache *cache)
> > +{
> > + struct drm_pagemap *dpagemap;
> > +
> > + spin_lock(&cache->lock);
> > + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> > + spin_unlock(&cache->lock);
> > +
> > + return dpagemap;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> > +
> > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > *dpagemap)
> > +{
> > + struct drm_pagemap_cache *cache = dpagemap->cache;
> > + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> > +
> > + spin_lock(&shrinker->lock);
> > + if (list_empty(&dpagemap->shrink_link)) {
> > + spin_unlock(&shrinker->lock);
> > + return false;
> > + }
> > +
> > + list_del_init(&dpagemap->shrink_link);
> > + atomic_dec(&shrinker->num_dpagemaps);
> > + spin_unlock(&shrinker->lock);
> > + return true;
> > +}
> > +
> > +/**
> > + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker
> > list or destroy
> > + * @dpagemap: The drm_pagemap.
> > + *
> > + * If @dpagemap is associated with a &struct drm_pagemap_cache AND
> > the
> > + * struct device backing the drm device is still alive, add
> > @dpagemap to
> > + * the &struct drm_pagemap_shrinker list of shrinkable
> > drm_pagemaps.
> > + *
> > + * Otherwise destroy the pagemap directly using
> > drm_pagemap_destroy().
> > + *
> > + * This is an internal function which is not intended to be
> > exposed to drivers.
> > + */
> > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
>
> Not a full review - slowly wrapping my head around the first 6
> patches
> but one quick question.
>
> This is called from drm_pagemap_put. How do we know what type of
> context
> we're in? It seems like this could be called from either process
> context
> or atomic context (e.g., via drm_pagemap_zdd_destroy through
> drm_pagemap_page_free). This code doesn’t appear to work in atomic
> contexts—if I recall correctly, drm_dev_enter can’t be called from
> atomic context. Also, we're missing irqsave on the spinlock.
From reading up on srcu_read_lock(), which is hiding behind
drm_dev_enter(), it should be OK to call from atomic context as long as
it is also released from the same context. I indeed checked that we
could call it under a spinlock without getting any lockdep warnings.
The irqsave on the spinlock is a different thing, though. Do we know
that drm_pagemap_page_free() will be called from irq context?
/Thomas
>
> We had a worker for ZDD destroy at one point—should we revive that?
> If
> we did, I think we could safely enforce a rule that drm_pagemap
> operations must only be called from process context.
>
> Matt
>
> > +{
> > + struct drm_pagemap_cache *cache;
> > + struct drm_pagemap_shrinker *shrinker;
> > + int idx;
> > +
> > + /*
> > + * The pagemap cache and shrinker are disabled at
> > + * pci device remove time. After that, dpagemaps
> > + * are freed directly.
> > + */
> > + if (!drm_dev_enter(dpagemap->drm, &idx))
> > + goto out_no_cache;
> > +
> > + cache = dpagemap->cache;
> > + if (!cache) {
> > + drm_dev_exit(idx);
> > + goto out_no_cache;
> > + }
> > +
> > + shrinker = cache->shrinker;
> > + spin_lock(&shrinker->lock);
> > + list_add_tail(&dpagemap->shrink_link, &shrinker-
> > >dpagemaps);
> > + atomic_inc(&shrinker->num_dpagemaps);
> > + spin_unlock(&shrinker->lock);
> > + complete_all(&cache->queued);
> > + drm_dev_exit(idx);
> > + return;
> > +
> > +out_no_cache:
> > + drm_pagemap_destroy(dpagemap, true);
> > +}
> > +
> > +static unsigned long
> > +drm_pagemap_shrinker_count(struct shrinker *shrink, struct
> > shrink_control *sc)
> > +{
> > + struct drm_pagemap_shrinker *shrinker = shrink-
> > >private_data;
> > + unsigned long count = atomic_read(&shrinker-
> > >num_dpagemaps);
> > +
> > + return count ? : SHRINK_EMPTY;
> > +}
> > +
> > +static unsigned long
> > +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct
> > shrink_control *sc)
> > +{
> > + struct drm_pagemap_shrinker *shrinker = shrink-
> > >private_data;
> > + struct drm_pagemap *dpagemap;
> > + struct drm_pagemap_cache *cache;
> > + unsigned long nr_freed = 0;
> > +
> > + sc->nr_scanned = 0;
> > + spin_lock(&shrinker->lock);
> > + do {
> > + dpagemap = list_first_entry_or_null(&shrinker-
> > >dpagemaps, typeof(*dpagemap),
> > + shrink_link);
> > + if (!dpagemap)
> > + break;
> > +
> > + atomic_dec(&shrinker->num_dpagemaps);
> > + list_del_init(&dpagemap->shrink_link);
> > + spin_unlock(&shrinker->lock);
> > +
> > + sc->nr_scanned++;
> > + nr_freed++;
> > +
> > + cache = dpagemap->cache;
> > + spin_lock(&cache->lock);
> > + cache->dpagemap = NULL;
> > + spin_unlock(&cache->lock);
> > +
> > + drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n",
> > dpagemap);
> > + drm_pagemap_destroy(dpagemap, true);
> > + spin_lock(&shrinker->lock);
> > + } while (sc->nr_scanned < sc->nr_to_scan);
> > + spin_unlock(&shrinker->lock);
> > +
> > + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> > +}
> > +
> > +static void drm_pagemap_shrinker_fini(void *arg)
> > +{
> > + struct drm_pagemap_shrinker *shrinker = arg;
> > +
> > + drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
> > + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker-
> > >num_dpagemaps));
> > + shrinker_free(shrinker->shrink);
> > + kfree(shrinker);
> > +}
> > +
> > +/**
> > + * drm_pagemap_shrinker_create_devm() - Create and register a
> > pagemap shrinker
> > + * @drm: The drm device
> > + *
> > + * Create and register a pagemap shrinker that shrinks unused
> > pagemaps
> > + * and thereby reduces memory footprint.
> > + * The shrinker is drm_device managed and unregisters itself when
> > + * the drm device is removed.
> > + *
> > + * Return: %0 on success, negative error code on failure.
> > + */
> > +struct drm_pagemap_shrinker
> > *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> > +{
> > + struct drm_pagemap_shrinker *shrinker;
> > + struct shrinker *shrink;
> > + int err;
> > +
> > + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> > + if (!shrinker)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm-
> > >unique);
> > + if (!shrink) {
> > + kfree(shrinker);
> > + return ERR_PTR(-ENOMEM);
> > + }
> > +
> > + spin_lock_init(&shrinker->lock);
> > + INIT_LIST_HEAD(&shrinker->dpagemaps);
> > + shrinker->drm = drm;
> > + shrinker->shrink = shrink;
> > + shrink->count_objects = drm_pagemap_shrinker_count;
> > + shrink->scan_objects = drm_pagemap_shrinker_scan;
> > + shrink->private_data = shrinker;
> > + shrinker_register(shrink);
> > +
> > + err = devm_add_action_or_reset(drm->dev,
> > drm_pagemap_shrinker_fini, shrinker);
> > + if (err)
> > + return ERR_PTR(err);
> > +
> > + return shrinker;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> > diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> > index 5cfe54331ba7..4b9af5e785c6 100644
> > --- a/include/drm/drm_pagemap.h
> > +++ b/include/drm/drm_pagemap.h
> > @@ -9,6 +9,7 @@
> > #define NR_PAGES(order) (1U << (order))
> >
> > struct drm_pagemap;
> > +struct drm_pagemap_cache;
> > struct drm_pagemap_dev_hold;
> > struct drm_pagemap_zdd;
> > struct device;
> > @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> > unsigned long start, unsigned long end,
> > struct mm_struct *mm,
> > unsigned long timeslice_ms);
> > + /**
> > + * @destroy: Destroy the drm_pagemap and associated
> > resources.
> > + * @dpagemap: The drm_pagemap to destroy.
> > + * @is_atomic_or_reclaim: The function may be called from
> > + * atomic- or reclaim context.
> > + *
> > + * The implementation should take care not to attempt to
> > + * destroy resources that may already have been destroyed
> > + * using devm_ callbacks, since this function may be
> > called
> > + * after the underlying struct device has been unbound.
> > + * If the implementation defers the execution to a work
> > item
> > + * to avoid locking issues, then it must make sure the
> > work
> > + * items are flushed before module exit. If the destroy
> > call
> > + * happens after the provider's pci_remove() callback has
> > + * been executed, a module reference and drm device
> > reference is
> > + * held across the destroy callback.
> > + */
> > + void (*destroy)(struct drm_pagemap *dpagemap,
> > + bool is_atomic_or_reclaim);
> > };
> >
> > /**
> > @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> > * @pagemap: Pointer to the underlying dev_pagemap.
> > * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> > * device referencing.
> > + * @cache: Back-pointer to the &struct drm_pagemap_cache used for
> > this
> > + * &struct drm_pagemap. May be NULL if no cache is used.
> > + * @shrink_link: Link into the shrinker's list of drm_pagemaps.
> > Only
> > + * used if also using a pagemap cache.
> > */
> > struct drm_pagemap {
> > const struct drm_pagemap_ops *ops;
> > @@ -142,6 +166,8 @@ struct drm_pagemap {
> > struct drm_device *drm;
> > struct dev_pagemap *pagemap;
> > struct drm_pagemap_dev_hold *dev_hold;
> > + struct drm_pagemap_cache *cache;
> > + struct list_head shrink_link;
> > };
> >
> > struct drm_pagemap_devmem;
> > @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> > unsigned long npages);
> > };
> >
> > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > + struct dev_pagemap *pagemap,
> > + struct drm_device *drm,
> > + const struct drm_pagemap_ops *ops);
> > +
> > struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> > struct dev_pagemap
> > *pagemap,
> > const struct
> > drm_pagemap_ops *ops);
> > @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct
> > drm_pagemap *dpagemap)
> >
> > /**
> > * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> > - * @dpagemap: Pointer to the struct drm_pagemap.
> > + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> > *
> > - * Return: Pointer to the struct drm_pagemap.
> > + * Return: Pointer to the struct drm_pagemap, or NULL.
> > */
> > static inline struct drm_pagemap *
> > drm_pagemap_get(struct drm_pagemap *dpagemap)
> > @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap *dpagemap)
> > return dpagemap;
> > }
> >
> > +/**
> > + * drm_pagemap_get_unless_zero() - Obtain a reference on a struct
> > drm_pagemap
> > + * unless the current reference count is zero.
> > + * @dpagemap: Pointer to the drm_pagemap or NULL.
> > + *
> > + * Return: A pointer to @dpagemap if the reference count was
> > successfully
> > + * incremented. NULL if @dpagemap was NULL, or its refcount was 0.
> > + */
> > +static inline struct drm_pagemap * __must_check
> > +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> > +{
> > + return (dpagemap && kref_get_unless_zero(&dpagemap->ref))
> > ? dpagemap : NULL;
> > +}
> > +
> > /**
> > * struct drm_pagemap_devmem - Structure representing a GPU SVM
> > device memory allocation
> > *
> > @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap
> > *dpagemap,
> > struct mm_struct *mm,
> > unsigned long timeslice_ms);
> >
> > -#endif
> > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > is_atomic_or_reclaim);
> >
> > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> > +#endif
> > diff --git a/include/drm/drm_pagemap_util.h
> > b/include/drm/drm_pagemap_util.h
> > new file mode 100644
> > index 000000000000..292244d429ee
> > --- /dev/null
> > +++ b/include/drm/drm_pagemap_util.h
> > @@ -0,0 +1,25 @@
> > +/* SPDX-License-Identifier: MIT */
> > +#ifndef _DRM_PAGEMAP_UTIL_H_
> > +#define _DRM_PAGEMAP_UTIL_H_
> > +
> > +struct drm_device;
> > +struct drm_pagemap;
> > +struct drm_pagemap_cache;
> > +struct drm_pagemap_shrinker;
> > +
> > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> > +
> > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > *cache);
> > +
> > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > *cache);
> > +
> > +struct drm_pagemap_shrinker
> > *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> > +
> > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > drm_pagemap_shrinker *shrinker);
> > +
> > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > drm_pagemap_cache *cache);
> > +
> > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > *cache, struct drm_pagemap *dpagemap);
> > +
> > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > drm_pagemap_cache *cache);
> > +#endif
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-28 9:46 ` Thomas Hellström
@ 2025-10-28 10:29 ` Thomas Hellström
2025-10-28 18:38 ` Matthew Brost
0 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-28 10:29 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 10:46 +0100, Thomas Hellström wrote:
> On Mon, 2025-10-27 at 18:23 -0700, Matthew Brost wrote:
> > On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> > > Pagemaps are costly to set up and tear down, and they consume a
> > > lot
> > > of system memory for the struct pages. Ideally they should be
> > > created only when needed.
> > >
> > > Add a caching mechanism to allow doing just that: Create the
> > > drm_pagemaps
> > > when needed for migration. Keep them around to avoid destruction
> > > and
> > > re-creation latencies and destroy inactive/unused drm_pagemaps on
> > > memory
> > > pressure using a shrinker.
> > >
> > > Only add the helper functions. They will be hooked up to the xe
> > > driver
> > > in the upcoming patch.
> > >
> > > Signed-off-by: Thomas Hellström
> > > <thomas.hellstrom@linux.intel.com>
> > > ---
> > > drivers/gpu/drm/Makefile | 3 +-
> > > drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> > > drivers/gpu/drm/drm_pagemap_util.c | 426
> > > +++++++++++++++++++++++++++++
> > > include/drm/drm_pagemap.h | 53 +++-
> > > include/drm/drm_pagemap_util.h | 25 ++
> > > 5 files changed, 569 insertions(+), 17 deletions(-)
> > > create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> > > create mode 100644 include/drm/drm_pagemap_util.h
> > >
> > > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > > index c2672f369aed..cdca68fd9f23 100644
> > > --- a/drivers/gpu/drm/Makefile
> > > +++ b/drivers/gpu/drm/Makefile
> > > @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
> > >
> > > drm_gpusvm_helper-y := \
> > > drm_gpusvm.o\
> > > - drm_pagemap.o
> > > + drm_pagemap.o\
> > > + drm_pagemap_util.o
> > > obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
> > >
> > > obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> > > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > > b/drivers/gpu/drm/drm_pagemap.c
> > > index fb18a80d6a1c..5ca5b2b53bc1 100644
> > > --- a/drivers/gpu/drm/drm_pagemap.c
> > > +++ b/drivers/gpu/drm/drm_pagemap.c
> > > @@ -8,6 +8,7 @@
> > > #include <linux/pagemap.h>
> > > #include <drm/drm_drv.h>
> > > #include <drm/drm_pagemap.h>
> > > +#include <drm/drm_pagemap_util.h>
> > > #include <drm/drm_print.h>
> > >
> > > /**
> > > @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref
> > > *ref)
> > > * pagemap provider drm_device and its module.
> > > */
> > > dpagemap->dev_hold = NULL;
> > > - kfree(dpagemap);
> > > + drm_pagemap_shrinker_add(dpagemap);
> > > llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> > > schedule_work(&drm_pagemap_work);
> > > /*
> > > @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap
> > > *dpagemap)
> > > return dev_hold;
> > > }
> > >
> > > +/**
> > > + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> > > + * @dpagemap: The drm_pagemap to reinitialize
> > > + *
> > > + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> > > + * has already been called. This interface is intended for the
> > > + * situation where the driver caches a destroyed drm_pagemap.
> > > + *
> > > + * Return: 0 on success, negative error code on failure.
> > > + */
> > > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> > > +{
> > > + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> > > + if (IS_ERR(dpagemap->dev_hold))
> > > + return PTR_ERR(dpagemap->dev_hold);
> > > +
> > > + kref_init(&dpagemap->ref);
> > > + return 0;
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_reinit);
> > > +
> > > +/**
> > > + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> > > + * @dpagemap: The drm_pagemap to initialize.
> > > + * @pagemap: The associated dev_pagemap providing the device
> > > + * private pages.
> > > + * @drm: The drm device. The drm_pagemap holds a reference on
> > > the
> > > + * drm_device and the module owning the drm_device until
> > > + * drm_pagemap_release(). This facilitates drm_pagemap
> > > exporting.
> > > + * @ops: The drm_pagemap ops.
> > > + *
> > > + * Initialize and take an initial reference on a drm_pagemap.
> > > + * After successful return, use drm_pagemap_put() to destroy.
> > > + *
> > > + ** Return: 0 on success, negative error code on error.
> > > + */
> > > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > > + struct dev_pagemap *pagemap,
> > > + struct drm_device *drm,
> > > + const struct drm_pagemap_ops *ops)
> > > +{
> > > + kref_init(&dpagemap->ref);
> > > + dpagemap->ops = ops;
> > > + dpagemap->pagemap = pagemap;
> > > + dpagemap->drm = drm;
> > > + dpagemap->cache = NULL;
> > > + INIT_LIST_HEAD(&dpagemap->shrink_link);
> > > +
> > > + return drm_pagemap_reinit(dpagemap);
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_init);
> > > +
> > > /**
> > > * drm_pagemap_create() - Create a struct drm_pagemap.
> > > * @drm: Pointer to a struct drm_device providing the device-
> > > private memory.
> > > @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> > > const struct drm_pagemap_ops *ops)
> > > {
> > > struct drm_pagemap *dpagemap =
> > > kzalloc(sizeof(*dpagemap),
> > > GFP_KERNEL);
> > > - struct drm_pagemap_dev_hold *dev_hold;
> > > + int err;
> > >
> > > if (!dpagemap)
> > > return ERR_PTR(-ENOMEM);
> > >
> > > - kref_init(&dpagemap->ref);
> > > - dpagemap->drm = drm;
> > > - dpagemap->ops = ops;
> > > - dpagemap->pagemap = pagemap;
> > > -
> > > - dev_hold = drm_pagemap_dev_hold(dpagemap);
> > > - if (IS_ERR(dev_hold)) {
> > > - kfree(dpagemap);
> > > - return ERR_CAST(dev_hold);
> > > - }
> > > - dpagemap->dev_hold = dev_hold;
> > > + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> > > + if (err)
> > > + return ERR_PTR(err);
> > >
> > > return dpagemap;
> > > }
> > > @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct
> > > drm_pagemap *dpagemap,
> > > }
> > > EXPORT_SYMBOL(drm_pagemap_populate_mm);
> > >
> > > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > > is_atomic_or_reclaim)
> > > +{
> > > + if (dpagemap->ops->destroy)
> > > + dpagemap->ops->destroy(dpagemap,
> > > is_atomic_or_reclaim);
> > > + else
> > > + kfree(dpagemap);
> > > +}
> > > +
> > > static void drm_pagemap_exit(void)
> > > {
> > > flush_work(&drm_pagemap_work);
> > > diff --git a/drivers/gpu/drm/drm_pagemap_util.c
> > > b/drivers/gpu/drm/drm_pagemap_util.c
> > > new file mode 100644
> > > index 000000000000..e1a1d6bf25f4
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/drm_pagemap_util.c
> > > @@ -0,0 +1,426 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> > > +/*
> > > + * Copyright © 2025 Intel Corporation
> > > + */
> > > +
> > > +#include <drm/drm_drv.h>
> > > +#include <drm/drm_managed.h>
> > > +#include <drm/drm_pagemap.h>
> > > +#include <drm/drm_pagemap_util.h>
> > > +#include <drm/drm_print.h>
> > > +
> > > +/**
> > > + * struct drm_pagemap_cache - Lookup structure for pagemaps
> > > + *
> > > + * Structure to keep track of active (refcount > 1) and inactive
> > > + * (refcount == 0) pagemaps. Inactive pagemaps can be made
> > > active
> > > + * again by waiting for the @queued completion (indicating that
> > > the
> > > + * pagemap has been put on the @shrinker's list of shrinkable
> > > + * pagemaps, and then successfully removing it from @shrinker's
> > > + * list. The latter may fail if the shrinker is already in the
> > > + * process of freeing the pagemap. A struct drm_pagemap_cache
> > > can
> > > + * hold a single struct drm_pagemap.
> > > + */
> > > +struct drm_pagemap_cache {
> > > + /** @lookup_mutex: Mutex making the lookup process
> > > atomic
> > > */
> > > + struct mutex lookup_mutex;
> > > + /** @lock: Lock protecting the @dpagemap pointer */
> > > + spinlock_t lock;
> > > + /** @shrinker: Pointer to the shrinker used for this
> > > cache. Immutable. */
> > > + struct drm_pagemap_shrinker *shrinker;
> > > + /** @dpagemap: Non-refcounted pointer to the drm_pagemap
> > > */
> > > + struct drm_pagemap *dpagemap;
> > > + /**
> > > + * @queued: Signals when an inactive drm_pagemap has
> > > been
> > > put on
> > > + * @shrinker's list.
> > > + */
> > > + struct completion queued;
> > > +};
> > > +
> > > +/**
> > > + * struct drm_pagemap_shrinker - Shrinker to remove unused
> > > pagemaps
> > > + */
> > > +struct drm_pagemap_shrinker {
> > > + /** @drm: Pointer to the drm device. */
> > > + struct drm_device *drm;
> > > + /** @lock: Spinlock to protect the @dpagemaps list. */
> > > + spinlock_t lock;
> > > + /** @dpagemaps: List of unused dpagemaps. */
> > > + struct list_head dpagemaps;
> > > + /** @num_dpagemaps: Number of unused dpagemaps in
> > > @dpagemaps. */
> > > + atomic_t num_dpagemaps;
> > > + /** @shrink: Pointer to the struct shrinker. */
> > > + struct shrinker *shrink;
> > > +};
> > > +
> > > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > > *dpagemap);
> > > +
> > > +static void drm_pagemap_cache_fini(void *arg)
> > > +{
> > > + struct drm_pagemap_cache *cache = arg;
> > > + struct drm_pagemap *dpagemap;
> > > +
> > > + drm_dbg(cache->shrinker->drm, "Destroying dpagemap
> > > cache.\n");
> > > + spin_lock(&cache->lock);
> > > + dpagemap = cache->dpagemap;
> > > + if (!dpagemap) {
> > > + spin_unlock(&cache->lock);
> > > + goto out;
> > > + }
> > > +
> > > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > > + cache->dpagemap = NULL;
> > > + spin_unlock(&cache->lock);
> > > + drm_pagemap_destroy(dpagemap, false);
> > > + }
> > > +
> > > +out:
> > > + mutex_destroy(&cache->lookup_mutex);
> > > + kfree(cache);
> > > +}
> > > +
> > > +/**
> > > + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> > > + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> > > + *
> > > + * Create a device-managed drm_pagemap cache. The cache is
> > > automatically
> > > + * destroyed on struct device removal, at which point any
> > > *inactive*
> > > + * drm_pagemap's are destroyed.
> > > + *
> > > + * Return: Pointer to a struct drm_pagemap_cache on success.
> > > Error
> > > pointer
> > > + * on failure.
> > > + */
> > > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > > drm_pagemap_shrinker *shrinker)
> > > +{
> > > + struct drm_pagemap_cache *cache =
> > > kzalloc(sizeof(*cache),
> > > GFP_KERNEL);
> > > + int err;
> > > +
> > > + if (!cache)
> > > + return ERR_PTR(-ENOMEM);
> > > +
> > > + mutex_init(&cache->lookup_mutex);
> > > + spin_lock_init(&cache->lock);
> > > + cache->shrinker = shrinker;
> > > + init_completion(&cache->queued);
> > > + err = devm_add_action_or_reset(shrinker->drm->dev,
> > > drm_pagemap_cache_fini, cache);
> > > + if (err)
> > > + return ERR_PTR(err);
> > > +
> > > + return cache;
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> > > +
> > > +/**
> > > + * DOC: Cache lookup
> > > + *
> > > + * Cache lookup should be done under a locked mutex, so that a
> > > + * failed drm_pagemap_get_from_cache() and a following
> > > + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> > > + * operation WRT other lookups. Otherwise, racing lookups may
> > > + * unnecessarily concurrently create pagemaps to fulfill a
> > > + * failed lookup. The API provides two functions to perform this
> > > lock,
> > > + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and
> > > they
> > > + * should be used in the following way:
> > > + *
> > > + * .. code-block:: c
> > > + *
> > > + * drm_pagemap_lock_lookup(cache);
> > > + * dpagemap = drm_pagemap_get_from_cache(cache);
> > > + * if (dpagemap)
> > > + * goto out_unlock;
> > > + *
> > > + * dpagemap = driver_create_new_dpagemap();
> > > + * if (!IS_ERR(dpagemap))
> > > + * drm_pagemap_cache_set_pagemap(cache,
> > > dpagemap);
> > > + *
> > > + * out_unlock:
> > > + * drm_pagemap_unlock_lookup(cache);
> > > + */
> > > +
> > > +/**
> > > + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for
> > > lookup
> > > + * @cache: The drm_pagemap_cache to lock.
> > > + *
> > > + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> > > + */
> > > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > > *cache)
> > > +{
> > > + return mutex_lock_interruptible(&cache->lookup_mutex);
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> > > +
> > > +/**
> > > + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache
> > > after lookup
> > > + * @cache: The drm_pagemap_cache to unlock.
> > > + */
> > > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > > *cache)
> > > +{
> > > + mutex_unlock(&cache->lookup_mutex);
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> > > +
> > > +/**
> > > + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
> > > + * @cache: The cache used for lookup.
> > > + *
> > > + * If an active pagemap is present in the cache, it is
> > > immediately
> > > returned.
> > > + * If an inactive pagemap is present, it's removed from the
> > > shrinker list and
> > > + * an attempt is made to make it active.
> > > + * If no pagemap present or the attempt to make it active
> > > failed,
> > > %NULL is returned
> > > + * to indicate to the caller to create a new drm_pagemap and
> > > insert it into
> > > + * the cache.
> > > + *
> > > + * Return: A reference-counted pointer to a drm_pagemap if
> > > successful. An error
> > > + * pointer if an error occurred, or %NULL if no drm_pagemap was
> > > found and
> > > + * the caller should insert a new one.
> > > + */
> > > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > > drm_pagemap_cache *cache)
> > > +{
> > > + struct drm_pagemap *dpagemap;
> > > + int err;
> > > +
> > > + lockdep_assert_held(&cache->lookup_mutex);
> > > +retry:
> > > + spin_lock(&cache->lock);
> > > + dpagemap = cache->dpagemap;
> > > + if (drm_pagemap_get_unless_zero(dpagemap)) {
> > > + spin_unlock(&cache->lock);
> > > + return dpagemap;
> > > + }
> > > +
> > > + if (!dpagemap) {
> > > + spin_unlock(&cache->lock);
> > > + return NULL;
> > > + }
> > > +
> > > + if (!try_wait_for_completion(&cache->queued)) {
> > > + spin_unlock(&cache->lock);
> > > + err = wait_for_completion_interruptible(&cache-
> > > > queued);
> > > + if (err)
> > > + return ERR_PTR(err);
> > > + goto retry;
> > > + }
> > > +
> > > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > > + cache->dpagemap = NULL;
> > > + spin_unlock(&cache->lock);
> > > + err = drm_pagemap_reinit(dpagemap);
> > > + if (err) {
> > > + drm_pagemap_destroy(dpagemap, false);
> > > + return ERR_PTR(err);
> > > + }
> > > + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> > > + } else {
> > > + cache->dpagemap = NULL;
> > > + spin_unlock(&cache->lock);
> > > + dpagemap = NULL;
> > > + }
> > > +
> > > + return dpagemap;
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> > > +
> > > +/**
> > > + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a
> > > drm_pagemap_cache
> > > + * @cache: The cache to assign the drm_pagemap to.
> > > + * @dpagemap: The drm_pagemap to assign.
> > > + *
> > > + * The function must be called to populate a drm_pagemap_cache
> > > only
> > > + * after a call to drm_pagemap_get_from_cache() returns NULL.
> > > + */
> > > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > > *cache, struct drm_pagemap *dpagemap)
> > > +{
> > > + struct drm_device *drm = dpagemap->drm;
> > > +
> > > + lockdep_assert_held(&cache->lookup_mutex);
> > > + spin_lock(&cache->lock);
> > > + dpagemap->cache = cache;
> > > + swap(cache->dpagemap, dpagemap);
> > > + reinit_completion(&cache->queued);
> > > + spin_unlock(&cache->lock);
> > > + drm_WARN_ON(drm, !!dpagemap);
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> > > +
> > > +/**
> > > + * drm_pagemap_get_from_cache_if_active() - Quick lookup of
> > > active
> > > drm_pagemaps
> > > + * @cache: The cache to lookup from.
> > > + *
> > > + * Function that should be used to lookup a drm_pagemap that is
> > > already active.
> > > + * (refcount > 0).
> > > + *
> > > + * Return: A pointer to the cache's drm_pagemap if it's active;
> > > %NULL otherwise.
> > > + */
> > > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > > drm_pagemap_cache *cache)
> > > +{
> > > + struct drm_pagemap *dpagemap;
> > > +
> > > + spin_lock(&cache->lock);
> > > + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> > > + spin_unlock(&cache->lock);
> > > +
> > > + return dpagemap;
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> > > +
> > > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > > *dpagemap)
> > > +{
> > > + struct drm_pagemap_cache *cache = dpagemap->cache;
> > > + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> > > +
> > > + spin_lock(&shrinker->lock);
> > > + if (list_empty(&dpagemap->shrink_link)) {
> > > + spin_unlock(&shrinker->lock);
> > > + return false;
> > > + }
> > > +
> > > + list_del_init(&dpagemap->shrink_link);
> > > + atomic_dec(&shrinker->num_dpagemaps);
> > > + spin_unlock(&shrinker->lock);
> > > + return true;
> > > +}
> > > +
> > > +/**
> > > + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the
> > > shrinker
> > > list or destroy
> > > + * @dpagemap: The drm_pagemap.
> > > + *
> > > + * If @dpagemap is associated with a &struct drm_pagemap_cache
> > > AND
> > > the
> > > + * struct device backing the drm device is still alive, add
> > > @dpagemap to
> > > + * the &struct drm_pagemap_shrinker list of shrinkable
> > > drm_pagemaps.
> > > + *
> > > + * Otherwise destroy the pagemap directly using
> > > drm_pagemap_destroy().
> > > + *
> > > + * This is an internal function which is not intended to be
> > > exposed to drivers.
> > > + */
> > > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
> >
> > Not a full review - slowly wrapping my head around the first 6
> > patches
> > but one quick question.
> >
> > This is called from drm_pagemap_put. How do we know what type of
> > context
> > we're in? It seems like this could be called from either process
> > context
> > or atomic context (e.g., via drm_pagemap_zdd_destroy through
> > drm_pagemap_page_free). This code doesn’t appear to work in atomic
> > contexts—if I recall correctly, drm_dev_enter can’t be called from
> > atomic context. Also, we're missing irqsave on the spinlock.
>
> From reading up on srcu_read_lock(), which is hiding behind
> drm_dev_enter(), it should be OK to call from atomic context as long
> as
> it is also released from the same context. I indeed checked that we
> could call it under a spinlock without getting any lockdep warnings.
>
> The irqsave on the spinlock is a different thing, though. Do we know
> that drm_pagemap_page_free() will be called from irq context?
Looks like the dmirror_devmem_free()
https://elixir.bootlin.com/linux/v6.18-rc3/source/lib/test_hmm.c#L1377
uses a spinlock without irqsave.
That said, I can add a drm_pagemap_shrinker_might_lock() to our
page_free() callback for CONFIG_PROVE_LOCKING.
/Thomas
>
> /Thomas
>
>
>
> >
> > We had a worker for ZDD destroy at one point—should we revive that?
> > If
> > we did, I think we could safely enforce a rule that drm_pagemap
> > operations must only be called from process context.
> >
> > Matt
> >
> > > +{
> > > + struct drm_pagemap_cache *cache;
> > > + struct drm_pagemap_shrinker *shrinker;
> > > + int idx;
> > > +
> > > + /*
> > > + * The pagemap cache and shrinker are disabled at
> > > + * pci device remove time. After that, dpagemaps
> > > + * are freed directly.
> > > + */
> > > + if (!drm_dev_enter(dpagemap->drm, &idx))
> > > + goto out_no_cache;
> > > +
> > > + cache = dpagemap->cache;
> > > + if (!cache) {
> > > + drm_dev_exit(idx);
> > > + goto out_no_cache;
> > > + }
> > > +
> > > + shrinker = cache->shrinker;
> > > + spin_lock(&shrinker->lock);
> > > + list_add_tail(&dpagemap->shrink_link, &shrinker-
> > > > dpagemaps);
> > > + atomic_inc(&shrinker->num_dpagemaps);
> > > + spin_unlock(&shrinker->lock);
> > > + complete_all(&cache->queued);
> > > + drm_dev_exit(idx);
> > > + return;
> > > +
> > > +out_no_cache:
> > > + drm_pagemap_destroy(dpagemap, true);
> > > +}
> > > +
> > > +static unsigned long
> > > +drm_pagemap_shrinker_count(struct shrinker *shrink, struct
> > > shrink_control *sc)
> > > +{
> > > + struct drm_pagemap_shrinker *shrinker = shrink-
> > > > private_data;
> > > + unsigned long count = atomic_read(&shrinker-
> > > > num_dpagemaps);
> > > +
> > > + return count ? : SHRINK_EMPTY;
> > > +}
> > > +
> > > +static unsigned long
> > > +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct
> > > shrink_control *sc)
> > > +{
> > > + struct drm_pagemap_shrinker *shrinker = shrink-
> > > > private_data;
> > > + struct drm_pagemap *dpagemap;
> > > + struct drm_pagemap_cache *cache;
> > > + unsigned long nr_freed = 0;
> > > +
> > > + sc->nr_scanned = 0;
> > > + spin_lock(&shrinker->lock);
> > > + do {
> > > + dpagemap = list_first_entry_or_null(&shrinker-
> > > > dpagemaps, typeof(*dpagemap),
> > > +
> > > shrink_link);
> > > + if (!dpagemap)
> > > + break;
> > > +
> > > + atomic_dec(&shrinker->num_dpagemaps);
> > > + list_del_init(&dpagemap->shrink_link);
> > > + spin_unlock(&shrinker->lock);
> > > +
> > > + sc->nr_scanned++;
> > > + nr_freed++;
> > > +
> > > + cache = dpagemap->cache;
> > > + spin_lock(&cache->lock);
> > > + cache->dpagemap = NULL;
> > > + spin_unlock(&cache->lock);
> > > +
> > > + drm_dbg(dpagemap->drm, "Shrinking dpagemap
> > > %p.\n",
> > > dpagemap);
> > > + drm_pagemap_destroy(dpagemap, true);
> > > + spin_lock(&shrinker->lock);
> > > + } while (sc->nr_scanned < sc->nr_to_scan);
> > > + spin_unlock(&shrinker->lock);
> > > +
> > > + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> > > +}
> > > +
> > > +static void drm_pagemap_shrinker_fini(void *arg)
> > > +{
> > > + struct drm_pagemap_shrinker *shrinker = arg;
> > > +
> > > + drm_dbg(shrinker->drm, "Destroying dpagemap
> > > shrinker.\n");
> > > + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker-
> > > > num_dpagemaps));
> > > + shrinker_free(shrinker->shrink);
> > > + kfree(shrinker);
> > > +}
> > > +
> > > +/**
> > > + * drm_pagemap_shrinker_create_devm() - Create and register a
> > > pagemap shrinker
> > > + * @drm: The drm device
> > > + *
> > > + * Create and register a pagemap shrinker that shrinks unused
> > > pagemaps
> > > + * and thereby reduces memory footprint.
> > > + * The shrinker is drm_device managed and unregisters itself
> > > when
> > > + * the drm device is removed.
> > > + *
> > > + * Return: %0 on success, negative error code on failure.
> > > + */
> > > +struct drm_pagemap_shrinker
> > > *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> > > +{
> > > + struct drm_pagemap_shrinker *shrinker;
> > > + struct shrinker *shrink;
> > > + int err;
> > > +
> > > + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> > > + if (!shrinker)
> > > + return ERR_PTR(-ENOMEM);
> > > +
> > > + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm-
> > > > unique);
> > > + if (!shrink) {
> > > + kfree(shrinker);
> > > + return ERR_PTR(-ENOMEM);
> > > + }
> > > +
> > > + spin_lock_init(&shrinker->lock);
> > > + INIT_LIST_HEAD(&shrinker->dpagemaps);
> > > + shrinker->drm = drm;
> > > + shrinker->shrink = shrink;
> > > + shrink->count_objects = drm_pagemap_shrinker_count;
> > > + shrink->scan_objects = drm_pagemap_shrinker_scan;
> > > + shrink->private_data = shrinker;
> > > + shrinker_register(shrink);
> > > +
> > > + err = devm_add_action_or_reset(drm->dev,
> > > drm_pagemap_shrinker_fini, shrinker);
> > > + if (err)
> > > + return ERR_PTR(err);
> > > +
> > > + return shrinker;
> > > +}
> > > +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> > > diff --git a/include/drm/drm_pagemap.h
> > > b/include/drm/drm_pagemap.h
> > > index 5cfe54331ba7..4b9af5e785c6 100644
> > > --- a/include/drm/drm_pagemap.h
> > > +++ b/include/drm/drm_pagemap.h
> > > @@ -9,6 +9,7 @@
> > > #define NR_PAGES(order) (1U << (order))
> > >
> > > struct drm_pagemap;
> > > +struct drm_pagemap_cache;
> > > struct drm_pagemap_dev_hold;
> > > struct drm_pagemap_zdd;
> > > struct device;
> > > @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> > > unsigned long start, unsigned long
> > > end,
> > > struct mm_struct *mm,
> > > unsigned long timeslice_ms);
> > > + /**
> > > + * @destroy: Destroy the drm_pagemap and associated
> > > resources.
> > > + * @dpagemap: The drm_pagemap to destroy.
> > > + * @is_atomic_or_reclaim: The function may be called
> > > from
> > > + * atomic- or reclaim context.
> > > + *
> > > + * The implementation should take care not to attempt to
> > > + * destroy resources that may already have been
> > > destroyed
> > > + * using devm_ callbacks, since this function may be
> > > called
> > > + * after the underlying struct device has been unbound.
> > > + * If the implementation defers the execution to a work
> > > item
> > > + * to avoid locking issues, then it must make sure the
> > > work
> > > + * items are flushed before module exit. If the destroy
> > > call
> > > + * happens after the provider's pci_remove() callback
> > > has
> > > + * been executed, a module reference and drm device
> > > reference is
> > > + * held across the destroy callback.
> > > + */
> > > + void (*destroy)(struct drm_pagemap *dpagemap,
> > > + bool is_atomic_or_reclaim);
> > > };
> > >
> > > /**
> > > @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> > > * @pagemap: Pointer to the underlying dev_pagemap.
> > > * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> > > * device referencing.
> > > + * @cache: Back-pointer to the &struct drm_pagemap_cache used
> > > for
> > > this
> > > + * &struct drm_pagemap. May be NULL if no cache is used.
> > > + * @shrink_link: Link into the shrinker's list of drm_pagemaps.
> > > Only
> > > + * used if also using a pagemap cache.
> > > */
> > > struct drm_pagemap {
> > > const struct drm_pagemap_ops *ops;
> > > @@ -142,6 +166,8 @@ struct drm_pagemap {
> > > struct drm_device *drm;
> > > struct dev_pagemap *pagemap;
> > > struct drm_pagemap_dev_hold *dev_hold;
> > > + struct drm_pagemap_cache *cache;
> > > + struct list_head shrink_link;
> > > };
> > >
> > > struct drm_pagemap_devmem;
> > > @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> > > unsigned long npages);
> > > };
> > >
> > > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > > + struct dev_pagemap *pagemap,
> > > + struct drm_device *drm,
> > > + const struct drm_pagemap_ops *ops);
> > > +
> > > struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> > > struct dev_pagemap
> > > *pagemap,
> > > const struct
> > > drm_pagemap_ops *ops);
> > > @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct
> > > drm_pagemap *dpagemap)
> > >
> > > /**
> > > * drm_pagemap_get() - Obtain a reference on a struct
> > > drm_pagemap
> > > - * @dpagemap: Pointer to the struct drm_pagemap.
> > > + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> > > *
> > > - * Return: Pointer to the struct drm_pagemap.
> > > + * Return: Pointer to the struct drm_pagemap, or NULL.
> > > */
> > > static inline struct drm_pagemap *
> > > drm_pagemap_get(struct drm_pagemap *dpagemap)
> > > @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap
> > > *dpagemap)
> > > return dpagemap;
> > > }
> > >
> > > +/**
> > > + * drm_pagemap_get_unless_zero() - Obtain a reference on a
> > > struct
> > > drm_pagemap
> > > + * unless the current reference count is zero.
> > > + * @dpagemap: Pointer to the drm_pagemap or NULL.
> > > + *
> > > + * Return: A pointer to @dpagemap if the reference count was
> > > successfully
> > > + * incremented. NULL if @dpagemap was NULL, or its refcount was
> > > 0.
> > > + */
> > > +static inline struct drm_pagemap * __must_check
> > > +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> > > +{
> > > + return (dpagemap && kref_get_unless_zero(&dpagemap-
> > > >ref))
> > > ? dpagemap : NULL;
> > > +}
> > > +
> > > /**
> > > * struct drm_pagemap_devmem - Structure representing a GPU SVM
> > > device memory allocation
> > > *
> > > @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct
> > > drm_pagemap
> > > *dpagemap,
> > > struct mm_struct *mm,
> > > unsigned long timeslice_ms);
> > >
> > > -#endif
> > > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > > is_atomic_or_reclaim);
> > >
> > > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> > > +#endif
> > > diff --git a/include/drm/drm_pagemap_util.h
> > > b/include/drm/drm_pagemap_util.h
> > > new file mode 100644
> > > index 000000000000..292244d429ee
> > > --- /dev/null
> > > +++ b/include/drm/drm_pagemap_util.h
> > > @@ -0,0 +1,25 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +#ifndef _DRM_PAGEMAP_UTIL_H_
> > > +#define _DRM_PAGEMAP_UTIL_H_
> > > +
> > > +struct drm_device;
> > > +struct drm_pagemap;
> > > +struct drm_pagemap_cache;
> > > +struct drm_pagemap_shrinker;
> > > +
> > > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> > > +
> > > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > > *cache);
> > > +
> > > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > > *cache);
> > > +
> > > +struct drm_pagemap_shrinker
> > > *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> > > +
> > > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > > drm_pagemap_shrinker *shrinker);
> > > +
> > > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > > drm_pagemap_cache *cache);
> > > +
> > > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > > *cache, struct drm_pagemap *dpagemap);
> > > +
> > > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > > drm_pagemap_cache *cache);
> > > +#endif
> > > --
> > > 2.51.0
> > >
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-28 10:29 ` Thomas Hellström
@ 2025-10-28 18:38 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 18:38 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, Oct 28, 2025 at 11:29:19AM +0100, Thomas Hellström wrote:
> On Tue, 2025-10-28 at 10:46 +0100, Thomas Hellström wrote:
> > On Mon, 2025-10-27 at 18:23 -0700, Matthew Brost wrote:
> > > On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> > > > Pagemaps are costly to set up and tear down, and they consume a
> > > > lot
> > > > of system memory for the struct pages. Ideally they should be
> > > > created only when needed.
> > > >
> > > > Add a caching mechanism to allow doing just that: Create the
> > > > drm_pagemaps
> > > > when needed for migration. Keep them around to avoid destruction
> > > > and
> > > > re-creation latencies and destroy inactive/unused drm_pagemaps on
> > > > memory
> > > > pressure using a shrinker.
> > > >
> > > > Only add the helper functions. They will be hooked up to the xe
> > > > driver
> > > > in the upcoming patch.
> > > >
> > > > Signed-off-by: Thomas Hellström
> > > > <thomas.hellstrom@linux.intel.com>
> > > > ---
> > > > drivers/gpu/drm/Makefile | 3 +-
> > > > drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> > > > drivers/gpu/drm/drm_pagemap_util.c | 426
> > > > +++++++++++++++++++++++++++++
> > > > include/drm/drm_pagemap.h | 53 +++-
> > > > include/drm/drm_pagemap_util.h | 25 ++
> > > > 5 files changed, 569 insertions(+), 17 deletions(-)
> > > > create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> > > > create mode 100644 include/drm/drm_pagemap_util.h
> > > >
> > > > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > > > index c2672f369aed..cdca68fd9f23 100644
> > > > --- a/drivers/gpu/drm/Makefile
> > > > +++ b/drivers/gpu/drm/Makefile
> > > > @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
> > > >
> > > > drm_gpusvm_helper-y := \
> > > > drm_gpusvm.o\
> > > > - drm_pagemap.o
> > > > + drm_pagemap.o\
> > > > + drm_pagemap_util.o
> > > > obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
> > > >
> > > > obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> > > > diff --git a/drivers/gpu/drm/drm_pagemap.c
> > > > b/drivers/gpu/drm/drm_pagemap.c
> > > > index fb18a80d6a1c..5ca5b2b53bc1 100644
> > > > --- a/drivers/gpu/drm/drm_pagemap.c
> > > > +++ b/drivers/gpu/drm/drm_pagemap.c
> > > > @@ -8,6 +8,7 @@
> > > > #include <linux/pagemap.h>
> > > > #include <drm/drm_drv.h>
> > > > #include <drm/drm_pagemap.h>
> > > > +#include <drm/drm_pagemap_util.h>
> > > > #include <drm/drm_print.h>
> > > >
> > > > /**
> > > > @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref
> > > > *ref)
> > > > * pagemap provider drm_device and its module.
> > > > */
> > > > dpagemap->dev_hold = NULL;
> > > > - kfree(dpagemap);
> > > > + drm_pagemap_shrinker_add(dpagemap);
> > > > llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> > > > schedule_work(&drm_pagemap_work);
> > > > /*
> > > > @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap
> > > > *dpagemap)
> > > > return dev_hold;
> > > > }
> > > >
> > > > +/**
> > > > + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> > > > + * @dpagemap: The drm_pagemap to reinitialize
> > > > + *
> > > > + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> > > > + * has already been called. This interface is intended for the
> > > > + * situation where the driver caches a destroyed drm_pagemap.
> > > > + *
> > > > + * Return: 0 on success, negative error code on failure.
> > > > + */
> > > > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> > > > +{
> > > > + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> > > > + if (IS_ERR(dpagemap->dev_hold))
> > > > + return PTR_ERR(dpagemap->dev_hold);
> > > > +
> > > > + kref_init(&dpagemap->ref);
> > > > + return 0;
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_reinit);
> > > > +
> > > > +/**
> > > > + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> > > > + * @dpagemap: The drm_pagemap to initialize.
> > > > + * @pagemap: The associated dev_pagemap providing the device
> > > > + * private pages.
> > > > + * @drm: The drm device. The drm_pagemap holds a reference on
> > > > the
> > > > + * drm_device and the module owning the drm_device until
> > > > + * drm_pagemap_release(). This facilitates drm_pagemap
> > > > exporting.
> > > > + * @ops: The drm_pagemap ops.
> > > > + *
> > > > + * Initialize and take an initial reference on a drm_pagemap.
> > > > + * After successful return, use drm_pagemap_put() to destroy.
> > > > + *
> > > > + ** Return: 0 on success, negative error code on error.
> > > > + */
> > > > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > > > + struct dev_pagemap *pagemap,
> > > > + struct drm_device *drm,
> > > > + const struct drm_pagemap_ops *ops)
> > > > +{
> > > > + kref_init(&dpagemap->ref);
> > > > + dpagemap->ops = ops;
> > > > + dpagemap->pagemap = pagemap;
> > > > + dpagemap->drm = drm;
> > > > + dpagemap->cache = NULL;
> > > > + INIT_LIST_HEAD(&dpagemap->shrink_link);
> > > > +
> > > > + return drm_pagemap_reinit(dpagemap);
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_init);
> > > > +
> > > > /**
> > > > * drm_pagemap_create() - Create a struct drm_pagemap.
> > > > * @drm: Pointer to a struct drm_device providing the device-
> > > > private memory.
> > > > @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> > > > const struct drm_pagemap_ops *ops)
> > > > {
> > > > struct drm_pagemap *dpagemap =
> > > > kzalloc(sizeof(*dpagemap),
> > > > GFP_KERNEL);
> > > > - struct drm_pagemap_dev_hold *dev_hold;
> > > > + int err;
> > > >
> > > > if (!dpagemap)
> > > > return ERR_PTR(-ENOMEM);
> > > >
> > > > - kref_init(&dpagemap->ref);
> > > > - dpagemap->drm = drm;
> > > > - dpagemap->ops = ops;
> > > > - dpagemap->pagemap = pagemap;
> > > > -
> > > > - dev_hold = drm_pagemap_dev_hold(dpagemap);
> > > > - if (IS_ERR(dev_hold)) {
> > > > - kfree(dpagemap);
> > > > - return ERR_CAST(dev_hold);
> > > > - }
> > > > - dpagemap->dev_hold = dev_hold;
> > > > + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> > > > + if (err)
> > > > + return ERR_PTR(err);
> > > >
> > > > return dpagemap;
> > > > }
> > > > @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct
> > > > drm_pagemap *dpagemap,
> > > > }
> > > > EXPORT_SYMBOL(drm_pagemap_populate_mm);
> > > >
> > > > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > > > is_atomic_or_reclaim)
> > > > +{
> > > > + if (dpagemap->ops->destroy)
> > > > + dpagemap->ops->destroy(dpagemap,
> > > > is_atomic_or_reclaim);
> > > > + else
> > > > + kfree(dpagemap);
> > > > +}
> > > > +
> > > > static void drm_pagemap_exit(void)
> > > > {
> > > > flush_work(&drm_pagemap_work);
> > > > diff --git a/drivers/gpu/drm/drm_pagemap_util.c
> > > > b/drivers/gpu/drm/drm_pagemap_util.c
> > > > new file mode 100644
> > > > index 000000000000..e1a1d6bf25f4
> > > > --- /dev/null
> > > > +++ b/drivers/gpu/drm/drm_pagemap_util.c
> > > > @@ -0,0 +1,426 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> > > > +/*
> > > > + * Copyright © 2025 Intel Corporation
> > > > + */
> > > > +
> > > > +#include <drm/drm_drv.h>
> > > > +#include <drm/drm_managed.h>
> > > > +#include <drm/drm_pagemap.h>
> > > > +#include <drm/drm_pagemap_util.h>
> > > > +#include <drm/drm_print.h>
> > > > +
> > > > +/**
> > > > + * struct drm_pagemap_cache - Lookup structure for pagemaps
> > > > + *
> > > > + * Structure to keep track of active (refcount > 1) and inactive
> > > > + * (refcount == 0) pagemaps. Inactive pagemaps can be made
> > > > active
> > > > + * again by waiting for the @queued completion (indicating that
> > > > the
> > > > + * pagemap has been put on the @shrinker's list of shrinkable
> > > > + * pagemaps, and then successfully removing it from @shrinker's
> > > > + * list. The latter may fail if the shrinker is already in the
> > > > + * process of freeing the pagemap. A struct drm_pagemap_cache
> > > > can
> > > > + * hold a single struct drm_pagemap.
> > > > + */
> > > > +struct drm_pagemap_cache {
> > > > + /** @lookup_mutex: Mutex making the lookup process
> > > > atomic
> > > > */
> > > > + struct mutex lookup_mutex;
> > > > + /** @lock: Lock protecting the @dpagemap pointer */
> > > > + spinlock_t lock;
> > > > + /** @shrinker: Pointer to the shrinker used for this
> > > > cache. Immutable. */
> > > > + struct drm_pagemap_shrinker *shrinker;
> > > > + /** @dpagemap: Non-refcounted pointer to the drm_pagemap
> > > > */
> > > > + struct drm_pagemap *dpagemap;
> > > > + /**
> > > > + * @queued: Signals when an inactive drm_pagemap has
> > > > been
> > > > put on
> > > > + * @shrinker's list.
> > > > + */
> > > > + struct completion queued;
> > > > +};
> > > > +
> > > > +/**
> > > > + * struct drm_pagemap_shrinker - Shrinker to remove unused
> > > > pagemaps
> > > > + */
> > > > +struct drm_pagemap_shrinker {
> > > > + /** @drm: Pointer to the drm device. */
> > > > + struct drm_device *drm;
> > > > + /** @lock: Spinlock to protect the @dpagemaps list. */
> > > > + spinlock_t lock;
> > > > + /** @dpagemaps: List of unused dpagemaps. */
> > > > + struct list_head dpagemaps;
> > > > + /** @num_dpagemaps: Number of unused dpagemaps in
> > > > @dpagemaps. */
> > > > + atomic_t num_dpagemaps;
> > > > + /** @shrink: Pointer to the struct shrinker. */
> > > > + struct shrinker *shrink;
> > > > +};
> > > > +
> > > > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > > > *dpagemap);
> > > > +
> > > > +static void drm_pagemap_cache_fini(void *arg)
> > > > +{
> > > > + struct drm_pagemap_cache *cache = arg;
> > > > + struct drm_pagemap *dpagemap;
> > > > +
> > > > + drm_dbg(cache->shrinker->drm, "Destroying dpagemap
> > > > cache.\n");
> > > > + spin_lock(&cache->lock);
> > > > + dpagemap = cache->dpagemap;
> > > > + if (!dpagemap) {
> > > > + spin_unlock(&cache->lock);
> > > > + goto out;
> > > > + }
> > > > +
> > > > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > > > + cache->dpagemap = NULL;
> > > > + spin_unlock(&cache->lock);
> > > > + drm_pagemap_destroy(dpagemap, false);
> > > > + }
> > > > +
> > > > +out:
> > > > + mutex_destroy(&cache->lookup_mutex);
> > > > + kfree(cache);
> > > > +}
> > > > +
> > > > +/**
> > > > + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> > > > + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> > > > + *
> > > > + * Create a device-managed drm_pagemap cache. The cache is
> > > > automatically
> > > > + * destroyed on struct device removal, at which point any
> > > > *inactive*
> > > > + * drm_pagemap's are destroyed.
> > > > + *
> > > > + * Return: Pointer to a struct drm_pagemap_cache on success.
> > > > Error
> > > > pointer
> > > > + * on failure.
> > > > + */
> > > > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > > > drm_pagemap_shrinker *shrinker)
> > > > +{
> > > > + struct drm_pagemap_cache *cache =
> > > > kzalloc(sizeof(*cache),
> > > > GFP_KERNEL);
> > > > + int err;
> > > > +
> > > > + if (!cache)
> > > > + return ERR_PTR(-ENOMEM);
> > > > +
> > > > + mutex_init(&cache->lookup_mutex);
> > > > + spin_lock_init(&cache->lock);
> > > > + cache->shrinker = shrinker;
> > > > + init_completion(&cache->queued);
> > > > + err = devm_add_action_or_reset(shrinker->drm->dev,
> > > > drm_pagemap_cache_fini, cache);
> > > > + if (err)
> > > > + return ERR_PTR(err);
> > > > +
> > > > + return cache;
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> > > > +
> > > > +/**
> > > > + * DOC: Cache lookup
> > > > + *
> > > > + * Cache lookup should be done under a locked mutex, so that a
> > > > + * failed drm_pagemap_get_from_cache() and a following
> > > > + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> > > > + * operation WRT other lookups. Otherwise, racing lookups may
> > > > + * unnecessarily concurrently create pagemaps to fulfill a
> > > > + * failed lookup. The API provides two functions to perform this
> > > > lock,
> > > > + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and
> > > > they
> > > > + * should be used in the following way:
> > > > + *
> > > > + * .. code-block:: c
> > > > + *
> > > > + * drm_pagemap_lock_lookup(cache);
> > > > + * dpagemap = drm_pagemap_get_from_cache(cache);
> > > > + * if (dpagemap)
> > > > + * goto out_unlock;
> > > > + *
> > > > + * dpagemap = driver_create_new_dpagemap();
> > > > + * if (!IS_ERR(dpagemap))
> > > > + * drm_pagemap_cache_set_pagemap(cache,
> > > > dpagemap);
> > > > + *
> > > > + * out_unlock:
> > > > + * drm_pagemap_unlock_lookup(cache);
> > > > + */
> > > > +
> > > > +/**
> > > > + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for
> > > > lookup
> > > > + * @cache: The drm_pagemap_cache to lock.
> > > > + *
> > > > + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> > > > + */
> > > > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > > > *cache)
> > > > +{
> > > > + return mutex_lock_interruptible(&cache->lookup_mutex);
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> > > > +
> > > > +/**
> > > > + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache
> > > > after lookup
> > > > + * @cache: The drm_pagemap_cache to unlock.
> > > > + */
> > > > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > > > *cache)
> > > > +{
> > > > + mutex_unlock(&cache->lookup_mutex);
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> > > > +
> > > > +/**
> > > > + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
> > > > + * @cache: The cache used for lookup.
> > > > + *
> > > > + * If an active pagemap is present in the cache, it is
> > > > immediately
> > > > returned.
> > > > + * If an inactive pagemap is present, it's removed from the
> > > > shrinker list and
> > > > + * an attempt is made to make it active.
> > > > + * If no pagemap present or the attempt to make it active
> > > > failed,
> > > > %NULL is returned
> > > > + * to indicate to the caller to create a new drm_pagemap and
> > > > insert it into
> > > > + * the cache.
> > > > + *
> > > > + * Return: A reference-counted pointer to a drm_pagemap if
> > > > successful. An error
> > > > + * pointer if an error occurred, or %NULL if no drm_pagemap was
> > > > found and
> > > > + * the caller should insert a new one.
> > > > + */
> > > > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > > > drm_pagemap_cache *cache)
> > > > +{
> > > > + struct drm_pagemap *dpagemap;
> > > > + int err;
> > > > +
> > > > + lockdep_assert_held(&cache->lookup_mutex);
> > > > +retry:
> > > > + spin_lock(&cache->lock);
> > > > + dpagemap = cache->dpagemap;
> > > > + if (drm_pagemap_get_unless_zero(dpagemap)) {
> > > > + spin_unlock(&cache->lock);
> > > > + return dpagemap;
> > > > + }
> > > > +
> > > > + if (!dpagemap) {
> > > > + spin_unlock(&cache->lock);
> > > > + return NULL;
> > > > + }
> > > > +
> > > > + if (!try_wait_for_completion(&cache->queued)) {
> > > > + spin_unlock(&cache->lock);
> > > > + err = wait_for_completion_interruptible(&cache-
> > > > > queued);
> > > > + if (err)
> > > > + return ERR_PTR(err);
> > > > + goto retry;
> > > > + }
> > > > +
> > > > + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> > > > + cache->dpagemap = NULL;
> > > > + spin_unlock(&cache->lock);
> > > > + err = drm_pagemap_reinit(dpagemap);
> > > > + if (err) {
> > > > + drm_pagemap_destroy(dpagemap, false);
> > > > + return ERR_PTR(err);
> > > > + }
> > > > + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> > > > + } else {
> > > > + cache->dpagemap = NULL;
> > > > + spin_unlock(&cache->lock);
> > > > + dpagemap = NULL;
> > > > + }
> > > > +
> > > > + return dpagemap;
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> > > > +
> > > > +/**
> > > > + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a
> > > > drm_pagemap_cache
> > > > + * @cache: The cache to assign the drm_pagemap to.
> > > > + * @dpagemap: The drm_pagemap to assign.
> > > > + *
> > > > + * The function must be called to populate a drm_pagemap_cache
> > > > only
> > > > + * after a call to drm_pagemap_get_from_cache() returns NULL.
> > > > + */
> > > > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > > > *cache, struct drm_pagemap *dpagemap)
> > > > +{
> > > > + struct drm_device *drm = dpagemap->drm;
> > > > +
> > > > + lockdep_assert_held(&cache->lookup_mutex);
> > > > + spin_lock(&cache->lock);
> > > > + dpagemap->cache = cache;
> > > > + swap(cache->dpagemap, dpagemap);
> > > > + reinit_completion(&cache->queued);
> > > > + spin_unlock(&cache->lock);
> > > > + drm_WARN_ON(drm, !!dpagemap);
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> > > > +
> > > > +/**
> > > > + * drm_pagemap_get_from_cache_if_active() - Quick lookup of
> > > > active
> > > > drm_pagemaps
> > > > + * @cache: The cache to lookup from.
> > > > + *
> > > > + * Function that should be used to lookup a drm_pagemap that is
> > > > already active.
> > > > + * (refcount > 0).
> > > > + *
> > > > + * Return: A pointer to the cache's drm_pagemap if it's active;
> > > > %NULL otherwise.
> > > > + */
> > > > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > > > drm_pagemap_cache *cache)
> > > > +{
> > > > + struct drm_pagemap *dpagemap;
> > > > +
> > > > + spin_lock(&cache->lock);
> > > > + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> > > > + spin_unlock(&cache->lock);
> > > > +
> > > > + return dpagemap;
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> > > > +
> > > > +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap
> > > > *dpagemap)
> > > > +{
> > > > + struct drm_pagemap_cache *cache = dpagemap->cache;
> > > > + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> > > > +
> > > > + spin_lock(&shrinker->lock);
> > > > + if (list_empty(&dpagemap->shrink_link)) {
> > > > + spin_unlock(&shrinker->lock);
> > > > + return false;
> > > > + }
> > > > +
> > > > + list_del_init(&dpagemap->shrink_link);
> > > > + atomic_dec(&shrinker->num_dpagemaps);
> > > > + spin_unlock(&shrinker->lock);
> > > > + return true;
> > > > +}
> > > > +
> > > > +/**
> > > > + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the
> > > > shrinker
> > > > list or destroy
> > > > + * @dpagemap: The drm_pagemap.
> > > > + *
> > > > + * If @dpagemap is associated with a &struct drm_pagemap_cache
> > > > AND
> > > > the
> > > > + * struct device backing the drm device is still alive, add
> > > > @dpagemap to
> > > > + * the &struct drm_pagemap_shrinker list of shrinkable
> > > > drm_pagemaps.
> > > > + *
> > > > + * Otherwise destroy the pagemap directly using
> > > > drm_pagemap_destroy().
> > > > + *
> > > > + * This is an internal function which is not intended to be
> > > > exposed to drivers.
> > > > + */
> > > > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
> > >
> > > Not a full review - slowly wrapping my head around the first 6
> > > patches
> > > but one quick question.
> > >
> > > This is called from drm_pagemap_put. How do we know what type of
> > > context
> > > we're in? It seems like this could be called from either process
> > > context
> > > or atomic context (e.g., via drm_pagemap_zdd_destroy through
> > > drm_pagemap_page_free). This code doesn’t appear to work in atomic
> > > contexts—if I recall correctly, drm_dev_enter can’t be called from
> > > atomic context. Also, we're missing irqsave on the spinlock.
> >
> > From reading up on srcu_read_lock(), which is hiding behind
> > drm_dev_enter(), it should be OK to call from atomic context as long
> > as
Yes, that should be fine then.
> > it is also released from the same context. I indeed checked that we
> > could call it under a spinlock without getting any lockdep warnings.
> >
> > The irqsave on the spinlock is a different thing, though. Do we know
> > that drm_pagemap_page_free() will be called from irq context?
IIRC it is weird corner case where the final put can be called from an
atomic context but perhaps I am misremembering here. Maybe it was the
final put was from reclaim and that's way we needed worker to be able
put the BO which takes a dma-resv lock?
>
> Looks like the dmirror_devmem_free()
> https://elixir.bootlin.com/linux/v6.18-rc3/source/lib/test_hmm.c#L1377
>
> uses a spinlock without irqsave.
>
> That said, I can add a drm_pagemap_shrinker_might_lock() to our
> page_free() callback for CONFIG_PROVE_LOCKING.
>
Again maybe I am miremembering here but I agree some lockdep magic would
be good to ensure our design is correct.
Matt
> /Thomas
>
>
> >
> > /Thomas
> >
> >
> >
> > >
> > > We had a worker for ZDD destroy at one point—should we revive that?
> > > If
> > > we did, I think we could safely enforce a rule that drm_pagemap
> > > operations must only be called from process context.
> > >
> > > Matt
> > >
> > > > +{
> > > > + struct drm_pagemap_cache *cache;
> > > > + struct drm_pagemap_shrinker *shrinker;
> > > > + int idx;
> > > > +
> > > > + /*
> > > > + * The pagemap cache and shrinker are disabled at
> > > > + * pci device remove time. After that, dpagemaps
> > > > + * are freed directly.
> > > > + */
> > > > + if (!drm_dev_enter(dpagemap->drm, &idx))
> > > > + goto out_no_cache;
> > > > +
> > > > + cache = dpagemap->cache;
> > > > + if (!cache) {
> > > > + drm_dev_exit(idx);
> > > > + goto out_no_cache;
> > > > + }
> > > > +
> > > > + shrinker = cache->shrinker;
> > > > + spin_lock(&shrinker->lock);
> > > > + list_add_tail(&dpagemap->shrink_link, &shrinker-
> > > > > dpagemaps);
> > > > + atomic_inc(&shrinker->num_dpagemaps);
> > > > + spin_unlock(&shrinker->lock);
> > > > + complete_all(&cache->queued);
> > > > + drm_dev_exit(idx);
> > > > + return;
> > > > +
> > > > +out_no_cache:
> > > > + drm_pagemap_destroy(dpagemap, true);
> > > > +}
> > > > +
> > > > +static unsigned long
> > > > +drm_pagemap_shrinker_count(struct shrinker *shrink, struct
> > > > shrink_control *sc)
> > > > +{
> > > > + struct drm_pagemap_shrinker *shrinker = shrink-
> > > > > private_data;
> > > > + unsigned long count = atomic_read(&shrinker-
> > > > > num_dpagemaps);
> > > > +
> > > > + return count ? : SHRINK_EMPTY;
> > > > +}
> > > > +
> > > > +static unsigned long
> > > > +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct
> > > > shrink_control *sc)
> > > > +{
> > > > + struct drm_pagemap_shrinker *shrinker = shrink-
> > > > > private_data;
> > > > + struct drm_pagemap *dpagemap;
> > > > + struct drm_pagemap_cache *cache;
> > > > + unsigned long nr_freed = 0;
> > > > +
> > > > + sc->nr_scanned = 0;
> > > > + spin_lock(&shrinker->lock);
> > > > + do {
> > > > + dpagemap = list_first_entry_or_null(&shrinker-
> > > > > dpagemaps, typeof(*dpagemap),
> > > > +
> > > > shrink_link);
> > > > + if (!dpagemap)
> > > > + break;
> > > > +
> > > > + atomic_dec(&shrinker->num_dpagemaps);
> > > > + list_del_init(&dpagemap->shrink_link);
> > > > + spin_unlock(&shrinker->lock);
> > > > +
> > > > + sc->nr_scanned++;
> > > > + nr_freed++;
> > > > +
> > > > + cache = dpagemap->cache;
> > > > + spin_lock(&cache->lock);
> > > > + cache->dpagemap = NULL;
> > > > + spin_unlock(&cache->lock);
> > > > +
> > > > + drm_dbg(dpagemap->drm, "Shrinking dpagemap
> > > > %p.\n",
> > > > dpagemap);
> > > > + drm_pagemap_destroy(dpagemap, true);
> > > > + spin_lock(&shrinker->lock);
> > > > + } while (sc->nr_scanned < sc->nr_to_scan);
> > > > + spin_unlock(&shrinker->lock);
> > > > +
> > > > + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> > > > +}
> > > > +
> > > > +static void drm_pagemap_shrinker_fini(void *arg)
> > > > +{
> > > > + struct drm_pagemap_shrinker *shrinker = arg;
> > > > +
> > > > + drm_dbg(shrinker->drm, "Destroying dpagemap
> > > > shrinker.\n");
> > > > + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker-
> > > > > num_dpagemaps));
> > > > + shrinker_free(shrinker->shrink);
> > > > + kfree(shrinker);
> > > > +}
> > > > +
> > > > +/**
> > > > + * drm_pagemap_shrinker_create_devm() - Create and register a
> > > > pagemap shrinker
> > > > + * @drm: The drm device
> > > > + *
> > > > + * Create and register a pagemap shrinker that shrinks unused
> > > > pagemaps
> > > > + * and thereby reduces memory footprint.
> > > > + * The shrinker is drm_device managed and unregisters itself
> > > > when
> > > > + * the drm device is removed.
> > > > + *
> > > > + * Return: %0 on success, negative error code on failure.
> > > > + */
> > > > +struct drm_pagemap_shrinker
> > > > *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> > > > +{
> > > > + struct drm_pagemap_shrinker *shrinker;
> > > > + struct shrinker *shrink;
> > > > + int err;
> > > > +
> > > > + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> > > > + if (!shrinker)
> > > > + return ERR_PTR(-ENOMEM);
> > > > +
> > > > + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm-
> > > > > unique);
> > > > + if (!shrink) {
> > > > + kfree(shrinker);
> > > > + return ERR_PTR(-ENOMEM);
> > > > + }
> > > > +
> > > > + spin_lock_init(&shrinker->lock);
> > > > + INIT_LIST_HEAD(&shrinker->dpagemaps);
> > > > + shrinker->drm = drm;
> > > > + shrinker->shrink = shrink;
> > > > + shrink->count_objects = drm_pagemap_shrinker_count;
> > > > + shrink->scan_objects = drm_pagemap_shrinker_scan;
> > > > + shrink->private_data = shrinker;
> > > > + shrinker_register(shrink);
> > > > +
> > > > + err = devm_add_action_or_reset(drm->dev,
> > > > drm_pagemap_shrinker_fini, shrinker);
> > > > + if (err)
> > > > + return ERR_PTR(err);
> > > > +
> > > > + return shrinker;
> > > > +}
> > > > +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> > > > diff --git a/include/drm/drm_pagemap.h
> > > > b/include/drm/drm_pagemap.h
> > > > index 5cfe54331ba7..4b9af5e785c6 100644
> > > > --- a/include/drm/drm_pagemap.h
> > > > +++ b/include/drm/drm_pagemap.h
> > > > @@ -9,6 +9,7 @@
> > > > #define NR_PAGES(order) (1U << (order))
> > > >
> > > > struct drm_pagemap;
> > > > +struct drm_pagemap_cache;
> > > > struct drm_pagemap_dev_hold;
> > > > struct drm_pagemap_zdd;
> > > > struct device;
> > > > @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> > > > unsigned long start, unsigned long
> > > > end,
> > > > struct mm_struct *mm,
> > > > unsigned long timeslice_ms);
> > > > + /**
> > > > + * @destroy: Destroy the drm_pagemap and associated
> > > > resources.
> > > > + * @dpagemap: The drm_pagemap to destroy.
> > > > + * @is_atomic_or_reclaim: The function may be called
> > > > from
> > > > + * atomic- or reclaim context.
> > > > + *
> > > > + * The implementation should take care not to attempt to
> > > > + * destroy resources that may already have been
> > > > destroyed
> > > > + * using devm_ callbacks, since this function may be
> > > > called
> > > > + * after the underlying struct device has been unbound.
> > > > + * If the implementation defers the execution to a work
> > > > item
> > > > + * to avoid locking issues, then it must make sure the
> > > > work
> > > > + * items are flushed before module exit. If the destroy
> > > > call
> > > > + * happens after the provider's pci_remove() callback
> > > > has
> > > > + * been executed, a module reference and drm device
> > > > reference is
> > > > + * held across the destroy callback.
> > > > + */
> > > > + void (*destroy)(struct drm_pagemap *dpagemap,
> > > > + bool is_atomic_or_reclaim);
> > > > };
> > > >
> > > > /**
> > > > @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> > > > * @pagemap: Pointer to the underlying dev_pagemap.
> > > > * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> > > > * device referencing.
> > > > + * @cache: Back-pointer to the &struct drm_pagemap_cache used
> > > > for
> > > > this
> > > > + * &struct drm_pagemap. May be NULL if no cache is used.
> > > > + * @shrink_link: Link into the shrinker's list of drm_pagemaps.
> > > > Only
> > > > + * used if also using a pagemap cache.
> > > > */
> > > > struct drm_pagemap {
> > > > const struct drm_pagemap_ops *ops;
> > > > @@ -142,6 +166,8 @@ struct drm_pagemap {
> > > > struct drm_device *drm;
> > > > struct dev_pagemap *pagemap;
> > > > struct drm_pagemap_dev_hold *dev_hold;
> > > > + struct drm_pagemap_cache *cache;
> > > > + struct list_head shrink_link;
> > > > };
> > > >
> > > > struct drm_pagemap_devmem;
> > > > @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> > > > unsigned long npages);
> > > > };
> > > >
> > > > +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> > > > + struct dev_pagemap *pagemap,
> > > > + struct drm_device *drm,
> > > > + const struct drm_pagemap_ops *ops);
> > > > +
> > > > struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> > > > struct dev_pagemap
> > > > *pagemap,
> > > > const struct
> > > > drm_pagemap_ops *ops);
> > > > @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct
> > > > drm_pagemap *dpagemap)
> > > >
> > > > /**
> > > > * drm_pagemap_get() - Obtain a reference on a struct
> > > > drm_pagemap
> > > > - * @dpagemap: Pointer to the struct drm_pagemap.
> > > > + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> > > > *
> > > > - * Return: Pointer to the struct drm_pagemap.
> > > > + * Return: Pointer to the struct drm_pagemap, or NULL.
> > > > */
> > > > static inline struct drm_pagemap *
> > > > drm_pagemap_get(struct drm_pagemap *dpagemap)
> > > > @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap
> > > > *dpagemap)
> > > > return dpagemap;
> > > > }
> > > >
> > > > +/**
> > > > + * drm_pagemap_get_unless_zero() - Obtain a reference on a
> > > > struct
> > > > drm_pagemap
> > > > + * unless the current reference count is zero.
> > > > + * @dpagemap: Pointer to the drm_pagemap or NULL.
> > > > + *
> > > > + * Return: A pointer to @dpagemap if the reference count was
> > > > successfully
> > > > + * incremented. NULL if @dpagemap was NULL, or its refcount was
> > > > 0.
> > > > + */
> > > > +static inline struct drm_pagemap * __must_check
> > > > +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> > > > +{
> > > > + return (dpagemap && kref_get_unless_zero(&dpagemap-
> > > > >ref))
> > > > ? dpagemap : NULL;
> > > > +}
> > > > +
> > > > /**
> > > > * struct drm_pagemap_devmem - Structure representing a GPU SVM
> > > > device memory allocation
> > > > *
> > > > @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct
> > > > drm_pagemap
> > > > *dpagemap,
> > > > struct mm_struct *mm,
> > > > unsigned long timeslice_ms);
> > > >
> > > > -#endif
> > > > +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool
> > > > is_atomic_or_reclaim);
> > > >
> > > > +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> > > > +#endif
> > > > diff --git a/include/drm/drm_pagemap_util.h
> > > > b/include/drm/drm_pagemap_util.h
> > > > new file mode 100644
> > > > index 000000000000..292244d429ee
> > > > --- /dev/null
> > > > +++ b/include/drm/drm_pagemap_util.h
> > > > @@ -0,0 +1,25 @@
> > > > +/* SPDX-License-Identifier: MIT */
> > > > +#ifndef _DRM_PAGEMAP_UTIL_H_
> > > > +#define _DRM_PAGEMAP_UTIL_H_
> > > > +
> > > > +struct drm_device;
> > > > +struct drm_pagemap;
> > > > +struct drm_pagemap_cache;
> > > > +struct drm_pagemap_shrinker;
> > > > +
> > > > +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> > > > +
> > > > +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > > > *cache);
> > > > +
> > > > +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache
> > > > *cache);
> > > > +
> > > > +struct drm_pagemap_shrinker
> > > > *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> > > > +
> > > > +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct
> > > > drm_pagemap_shrinker *shrinker);
> > > > +
> > > > +struct drm_pagemap *drm_pagemap_get_from_cache(struct
> > > > drm_pagemap_cache *cache);
> > > > +
> > > > +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > > > *cache, struct drm_pagemap *dpagemap);
> > > > +
> > > > +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > > > drm_pagemap_cache *cache);
> > > > +#endif
> > > > --
> > > > 2.51.0
> > > >
> >
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
2025-10-28 1:23 ` Matthew Brost
@ 2025-10-29 22:41 ` Matthew Brost
2025-10-29 22:48 ` Matthew Brost
2 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 22:41 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> Pagemaps are costly to set up and tear down, and they consume a lot
> of system memory for the struct pages. Ideally they should be
> created only when needed.
>
> Add a caching mechanism to allow doing just that: Create the drm_pagemaps
> when needed for migration. Keep them around to avoid destruction and
> re-creation latencies and destroy inactive/unused drm_pagemaps on memory
> pressure using a shrinker.
>
> Only add the helper functions. They will be hooked up to the xe driver
> in the upcoming patch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/Makefile | 3 +-
> drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> drivers/gpu/drm/drm_pagemap_util.c | 426 +++++++++++++++++++++++++++++
> include/drm/drm_pagemap.h | 53 +++-
> include/drm/drm_pagemap_util.h | 25 ++
> 5 files changed, 569 insertions(+), 17 deletions(-)
> create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> create mode 100644 include/drm/drm_pagemap_util.h
>
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index c2672f369aed..cdca68fd9f23 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
>
> drm_gpusvm_helper-y := \
> drm_gpusvm.o\
> - drm_pagemap.o
> + drm_pagemap.o\
> + drm_pagemap_util.o
> obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
>
> obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index fb18a80d6a1c..5ca5b2b53bc1 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -8,6 +8,7 @@
> #include <linux/pagemap.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> #include <drm/drm_print.h>
>
> /**
> @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref *ref)
> * pagemap provider drm_device and its module.
> */
> dpagemap->dev_hold = NULL;
> - kfree(dpagemap);
> + drm_pagemap_shrinker_add(dpagemap);
> llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> schedule_work(&drm_pagemap_work);
> /*
> @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> return dev_hold;
> }
>
> +/**
> + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> + * @dpagemap: The drm_pagemap to reinitialize
> + *
> + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> + * has already been called. This interface is intended for the
> + * situation where the driver caches a destroyed drm_pagemap.
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> +{
> + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> + if (IS_ERR(dpagemap->dev_hold))
> + return PTR_ERR(dpagemap->dev_hold);
> +
> + kref_init(&dpagemap->ref);
> + return 0;
> +}
> +EXPORT_SYMBOL(drm_pagemap_reinit);
> +
> +/**
> + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> + * @dpagemap: The drm_pagemap to initialize.
> + * @pagemap: The associated dev_pagemap providing the device
> + * private pages.
> + * @drm: The drm device. The drm_pagemap holds a reference on the
> + * drm_device and the module owning the drm_device until
> + * drm_pagemap_release(). This facilitates drm_pagemap exporting.
> + * @ops: The drm_pagemap ops.
> + *
> + * Initialize and take an initial reference on a drm_pagemap.
> + * After successful return, use drm_pagemap_put() to destroy.
> + *
> + ** Return: 0 on success, negative error code on error.
> + */
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops)
> +{
Should we be parnoid and assert dpagemap->ref is zero?
> + kref_init(&dpagemap->ref);
> + dpagemap->ops = ops;
> + dpagemap->pagemap = pagemap;
> + dpagemap->drm = drm;
> + dpagemap->cache = NULL;
> + INIT_LIST_HEAD(&dpagemap->shrink_link);
> +
> + return drm_pagemap_reinit(dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_init);
> +
> /**
> * drm_pagemap_create() - Create a struct drm_pagemap.
> * @drm: Pointer to a struct drm_device providing the device-private memory.
> @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> const struct drm_pagemap_ops *ops)
> {
> struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> - struct drm_pagemap_dev_hold *dev_hold;
> + int err;
>
> if (!dpagemap)
> return ERR_PTR(-ENOMEM);
>
> - kref_init(&dpagemap->ref);
> - dpagemap->drm = drm;
> - dpagemap->ops = ops;
> - dpagemap->pagemap = pagemap;
> -
> - dev_hold = drm_pagemap_dev_hold(dpagemap);
> - if (IS_ERR(dev_hold)) {
> - kfree(dpagemap);
> - return ERR_CAST(dev_hold);
> - }
> - dpagemap->dev_hold = dev_hold;
> + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> + if (err)
> + return ERR_PTR(err);
>
> return dpagemap;
> }
> @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> }
> EXPORT_SYMBOL(drm_pagemap_populate_mm);
>
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
> +{
Likewise here, assert ref count is zero?
> + if (dpagemap->ops->destroy)
> + dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
> + else
> + kfree(dpagemap);
> +}
> +
> static void drm_pagemap_exit(void)
> {
> flush_work(&drm_pagemap_work);
> diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
> new file mode 100644
> index 000000000000..e1a1d6bf25f4
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_pagemap_util.c
> @@ -0,0 +1,426 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> +#include <drm/drm_print.h>
> +
> +/**
> + * struct drm_pagemap_cache - Lookup structure for pagemaps
> + *
> + * Structure to keep track of active (refcount > 1) and inactive
> + * (refcount == 0) pagemaps. Inactive pagemaps can be made active
> + * again by waiting for the @queued completion (indicating that the
> + * pagemap has been put on the @shrinker's list of shrinkable
> + * pagemaps, and then successfully removing it from @shrinker's
> + * list. The latter may fail if the shrinker is already in the
> + * process of freeing the pagemap. A struct drm_pagemap_cache can
> + * hold a single struct drm_pagemap.
> + */
> +struct drm_pagemap_cache {
> + /** @lookup_mutex: Mutex making the lookup process atomic */
> + struct mutex lookup_mutex;
> + /** @lock: Lock protecting the @dpagemap pointer */
> + spinlock_t lock;
> + /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */
> + struct drm_pagemap_shrinker *shrinker;
> + /** @dpagemap: Non-refcounted pointer to the drm_pagemap */
> + struct drm_pagemap *dpagemap;
> + /**
> + * @queued: Signals when an inactive drm_pagemap has been put on
> + * @shrinker's list.
> + */
> + struct completion queued;
> +};
> +
> +/**
> + * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps
> + */
> +struct drm_pagemap_shrinker {
> + /** @drm: Pointer to the drm device. */
> + struct drm_device *drm;
> + /** @lock: Spinlock to protect the @dpagemaps list. */
> + spinlock_t lock;
> + /** @dpagemaps: List of unused dpagemaps. */
> + struct list_head dpagemaps;
> + /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */
> + atomic_t num_dpagemaps;
> + /** @shrink: Pointer to the struct shrinker. */
> + struct shrinker *shrink;
> +};
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap);
> +
> +static void drm_pagemap_cache_fini(void *arg)
> +{
> + struct drm_pagemap_cache *cache = arg;
> + struct drm_pagemap *dpagemap;
> +
> + drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n");
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + goto out;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + drm_pagemap_destroy(dpagemap, false);
> + }
> +
> +out:
> + mutex_destroy(&cache->lookup_mutex);
> + kfree(cache);
> +}
> +
> +/**
> + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> + *
> + * Create a device-managed drm_pagemap cache. The cache is automatically
> + * destroyed on struct device removal, at which point any *inactive*
> + * drm_pagemap's are destroyed.
> + *
> + * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer
> + * on failure.
> + */
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker)
> +{
> + struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
> + int err;
> +
> + if (!cache)
> + return ERR_PTR(-ENOMEM);
> +
> + mutex_init(&cache->lookup_mutex);
devm_mutex_init ?
> + spin_lock_init(&cache->lock);
> + cache->shrinker = shrinker;
> + init_completion(&cache->queued);
> + err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache);
> + if (err)
> + return ERR_PTR(err);
> +
> + return cache;
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> +
> +/**
> + * DOC: Cache lookup
> + *
> + * Cache lookup should be done under a locked mutex, so that a
> + * failed drm_pagemap_get_from_cache() and a following
> + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> + * operation WRT other lookups. Otherwise, racing lookups may
> + * unnecessarily concurrently create pagemaps to fulfill a
> + * failed lookup. The API provides two functions to perform this lock,
> + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they
> + * should be used in the following way:
> + *
> + * .. code-block:: c
> + *
> + * drm_pagemap_lock_lookup(cache);
s/drm_pagemap_lock_lookup/drm_pagemap_cache_lock_lookup
> + * dpagemap = drm_pagemap_get_from_cache(cache);
> + * if (dpagemap)
> + * goto out_unlock;
> + *
> + * dpagemap = driver_create_new_dpagemap();
> + * if (!IS_ERR(dpagemap))
> + * drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + *
> + * out_unlock:
> + * drm_pagemap_unlock_lookup(cache);
> + */
> +
> +/**
> + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for lookup
> + * @cache: The drm_pagemap_cache to lock.
> + *
> + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> + */
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
> +{
> + return mutex_lock_interruptible(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> +
> +/**
> + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache after lookup
> + * @cache: The drm_pagemap_cache to unlock.
> + */
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache)
> +{
> + mutex_unlock(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> +
> +/**
> + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
Should we normalize all cache functions prefixed with 'drm_pagemap_cache'?
So maybe drm_pagemap_cache_lookup_get ?
> + * @cache: The cache used for lookup.
> + *
> + * If an active pagemap is present in the cache, it is immediately returned.
> + * If an inactive pagemap is present, it's removed from the shrinker list and
> + * an attempt is made to make it active.
> + * If no pagemap present or the attempt to make it active failed, %NULL is returned
> + * to indicate to the caller to create a new drm_pagemap and insert it into
> + * the cache.
> + *
> + * Return: A reference-counted pointer to a drm_pagemap if successful. An error
> + * pointer if an error occurred, or %NULL if no drm_pagemap was found and
> + * the caller should insert a new one.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache)
> +{
> + struct drm_pagemap *dpagemap;
> + int err;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> +retry:
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (drm_pagemap_get_unless_zero(dpagemap)) {
> + spin_unlock(&cache->lock);
> + return dpagemap;
> + }
> +
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + return NULL;
> + }
> +
> + if (!try_wait_for_completion(&cache->queued)) {
> + spin_unlock(&cache->lock);
> + err = wait_for_completion_interruptible(&cache->queued);
> + if (err)
> + return ERR_PTR(err);
> + goto retry;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + err = drm_pagemap_reinit(dpagemap);
> + if (err) {
> + drm_pagemap_destroy(dpagemap, false);
> + return ERR_PTR(err);
> + }
> + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + } else {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + dpagemap = NULL;
> + }
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> +
> +/**
> + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache
> + * @cache: The cache to assign the drm_pagemap to.
> + * @dpagemap: The drm_pagemap to assign.
> + *
> + * The function must be called to populate a drm_pagemap_cache only
> + * after a call to drm_pagemap_get_from_cache() returns NULL.
> + */
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap)
> +{
> + struct drm_device *drm = dpagemap->drm;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> + spin_lock(&cache->lock);
> + dpagemap->cache = cache;
> + swap(cache->dpagemap, dpagemap);
> + reinit_completion(&cache->queued);
> + spin_unlock(&cache->lock);
> + drm_WARN_ON(drm, !!dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> +
> +/**
> + * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps
> + * @cache: The cache to lookup from.
> + *
> + * Function that should be used to lookup a drm_pagemap that is already active.
> + * (refcount > 0).
> + *
> + * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache)
Then here drm_pagemap_cache_lookup_if_active ?
> +{
> + struct drm_pagemap *dpagemap;
> +
> + spin_lock(&cache->lock);
> + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> + spin_unlock(&cache->lock);
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_cache *cache = dpagemap->cache;
> + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> +
> + spin_lock(&shrinker->lock);
> + if (list_empty(&dpagemap->shrink_link)) {
> + spin_unlock(&shrinker->lock);
> + return false;
> + }
> +
> + list_del_init(&dpagemap->shrink_link);
> + atomic_dec(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + return true;
> +}
> +
> +/**
> + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy
> + * @dpagemap: The drm_pagemap.
> + *
> + * If @dpagemap is associated with a &struct drm_pagemap_cache AND the
> + * struct device backing the drm device is still alive, add @dpagemap to
> + * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps.
> + *
> + * Otherwise destroy the pagemap directly using drm_pagemap_destroy().
> + *
> + * This is an internal function which is not intended to be exposed to drivers.
> + */
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_cache *cache;
> + struct drm_pagemap_shrinker *shrinker;
> + int idx;
> +
> + /*
> + * The pagemap cache and shrinker are disabled at
> + * pci device remove time. After that, dpagemaps
> + * are freed directly.
> + */
> + if (!drm_dev_enter(dpagemap->drm, &idx))
> + goto out_no_cache;
> +
> + cache = dpagemap->cache;
> + if (!cache) {
> + drm_dev_exit(idx);
> + goto out_no_cache;
> + }
Can you explain the above logic, I'm not really following this.
> +
> + shrinker = cache->shrinker;
> + spin_lock(&shrinker->lock);
> + list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps);
> + atomic_inc(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + complete_all(&cache->queued);
> + drm_dev_exit(idx);
> + return;
> +
> +out_no_cache:
> + drm_pagemap_destroy(dpagemap, true);
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + unsigned long count = atomic_read(&shrinker->num_dpagemaps);
> +
> + return count ? : SHRINK_EMPTY;
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + struct drm_pagemap *dpagemap;
> + struct drm_pagemap_cache *cache;
> + unsigned long nr_freed = 0;
> +
> + sc->nr_scanned = 0;
> + spin_lock(&shrinker->lock);
> + do {
> + dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap),
> + shrink_link);
> + if (!dpagemap)
> + break;
> +
> + atomic_dec(&shrinker->num_dpagemaps);
> + list_del_init(&dpagemap->shrink_link);
> + spin_unlock(&shrinker->lock);
> +
> + sc->nr_scanned++;
> + nr_freed++;
sc->nr_scanned and nr_freed will always be the same value, right? I
think nr_freed can be dropped then.
> +
> + cache = dpagemap->cache;
> + spin_lock(&cache->lock);
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> +
> + drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap);
> + drm_pagemap_destroy(dpagemap, true);
> + spin_lock(&shrinker->lock);
> + } while (sc->nr_scanned < sc->nr_to_scan);
> + spin_unlock(&shrinker->lock);
> +
> + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> +}
> +
> +static void drm_pagemap_shrinker_fini(void *arg)
> +{
> + struct drm_pagemap_shrinker *shrinker = arg;
> +
> + drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
> + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps));
> + shrinker_free(shrinker->shrink);
> + kfree(shrinker);
> +}
> +
> +/**
> + * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker
> + * @drm: The drm device
> + *
> + * Create and register a pagemap shrinker that shrinks unused pagemaps
> + * and thereby reduces memory footprint.
> + * The shrinker is drm_device managed and unregisters itself when
> + * the drm device is removed.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> +{
> + struct drm_pagemap_shrinker *shrinker;
> + struct shrinker *shrink;
> + int err;
> +
> + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> + if (!shrinker)
> + return ERR_PTR(-ENOMEM);
> +
> + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique);
> + if (!shrink) {
> + kfree(shrinker);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + spin_lock_init(&shrinker->lock);
> + INIT_LIST_HEAD(&shrinker->dpagemaps);
> + shrinker->drm = drm;
> + shrinker->shrink = shrink;
> + shrink->count_objects = drm_pagemap_shrinker_count;
> + shrink->scan_objects = drm_pagemap_shrinker_scan;
> + shrink->private_data = shrinker;
> + shrinker_register(shrink);
> +
> + err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker);
> + if (err)
> + return ERR_PTR(err);
> +
> + return shrinker;
> +}
> +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index 5cfe54331ba7..4b9af5e785c6 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -9,6 +9,7 @@
> #define NR_PAGES(order) (1U << (order))
>
> struct drm_pagemap;
> +struct drm_pagemap_cache;
> struct drm_pagemap_dev_hold;
> struct drm_pagemap_zdd;
> struct device;
> @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> unsigned long start, unsigned long end,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
> + /**
> + * @destroy: Destroy the drm_pagemap and associated resources.
> + * @dpagemap: The drm_pagemap to destroy.
> + * @is_atomic_or_reclaim: The function may be called from
> + * atomic- or reclaim context.
> + *
> + * The implementation should take care not to attempt to
> + * destroy resources that may already have been destroyed
> + * using devm_ callbacks, since this function may be called
> + * after the underlying struct device has been unbound.
> + * If the implementation defers the execution to a work item
> + * to avoid locking issues, then it must make sure the work
> + * items are flushed before module exit. If the destroy call
> + * happens after the provider's pci_remove() callback has
> + * been executed, a module reference and drm device reference is
> + * held across the destroy callback.
> + */
> + void (*destroy)(struct drm_pagemap *dpagemap,
> + bool is_atomic_or_reclaim);
> };
>
> /**
> @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> * @pagemap: Pointer to the underlying dev_pagemap.
> * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> * device referencing.
> + * @cache: Back-pointer to the &struct drm_pagemap_cache used for this
> + * &struct drm_pagemap. May be NULL if no cache is used.
> + * @shrink_link: Link into the shrinker's list of drm_pagemaps. Only
> + * used if also using a pagemap cache.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> @@ -142,6 +166,8 @@ struct drm_pagemap {
> struct drm_device *drm;
> struct dev_pagemap *pagemap;
> struct drm_pagemap_dev_hold *dev_hold;
> + struct drm_pagemap_cache *cache;
> + struct list_head shrink_link;
> };
>
> struct drm_pagemap_devmem;
> @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops);
> +
> struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> struct dev_pagemap *pagemap,
> const struct drm_pagemap_ops *ops);
> @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
>
> /**
> * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> - * @dpagemap: Pointer to the struct drm_pagemap.
> + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> *
> - * Return: Pointer to the struct drm_pagemap.
> + * Return: Pointer to the struct drm_pagemap, or NULL.
> */
> static inline struct drm_pagemap *
> drm_pagemap_get(struct drm_pagemap *dpagemap)
> @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap *dpagemap)
> return dpagemap;
> }
>
> +/**
> + * drm_pagemap_get_unless_zero() - Obtain a reference on a struct drm_pagemap
> + * unless the current reference count is zero.
> + * @dpagemap: Pointer to the drm_pagemap or NULL.
> + *
> + * Return: A pointer to @dpagemap if the reference count was successfully
> + * incremented. NULL if @dpagemap was NULL, or its refcount was 0.
> + */
> +static inline struct drm_pagemap * __must_check
> +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> +{
> + return (dpagemap && kref_get_unless_zero(&dpagemap->ref)) ? dpagemap : NULL;
> +}
> +
> /**
> * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
> *
> @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
>
> -#endif
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim);
>
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> +#endif
> diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
> new file mode 100644
> index 000000000000..292244d429ee
> --- /dev/null
> +++ b/include/drm/drm_pagemap_util.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: MIT */
>
Do we need an Intel copywrite?
+#ifndef _DRM_PAGEMAP_UTIL_H_
> +#define _DRM_PAGEMAP_UTIL_H_
> +
> +struct drm_device;
> +struct drm_pagemap;
> +struct drm_pagemap_cache;
> +struct drm_pagemap_shrinker;
> +
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> +
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache);
> +
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> +
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
I'd add an extra newline here.
Matt
> +#endif
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
2025-10-28 1:23 ` Matthew Brost
2025-10-29 22:41 ` Matthew Brost
@ 2025-10-29 22:48 ` Matthew Brost
2 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 22:48 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:01PM +0200, Thomas Hellström wrote:
> Pagemaps are costly to set up and tear down, and they consume a lot
> of system memory for the struct pages. Ideally they should be
> created only when needed.
>
> Add a caching mechanism to allow doing just that: Create the drm_pagemaps
> when needed for migration. Keep them around to avoid destruction and
> re-creation latencies and destroy inactive/unused drm_pagemaps on memory
> pressure using a shrinker.
>
> Only add the helper functions. They will be hooked up to the xe driver
> in the upcoming patch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/Makefile | 3 +-
> drivers/gpu/drm/drm_pagemap.c | 79 +++++-
> drivers/gpu/drm/drm_pagemap_util.c | 426 +++++++++++++++++++++++++++++
> include/drm/drm_pagemap.h | 53 +++-
> include/drm/drm_pagemap_util.h | 25 ++
> 5 files changed, 569 insertions(+), 17 deletions(-)
> create mode 100644 drivers/gpu/drm/drm_pagemap_util.c
> create mode 100644 include/drm/drm_pagemap_util.h
>
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index c2672f369aed..cdca68fd9f23 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -107,7 +107,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
>
> drm_gpusvm_helper-y := \
> drm_gpusvm.o\
> - drm_pagemap.o
> + drm_pagemap.o\
> + drm_pagemap_util.o
> obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
>
> obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index fb18a80d6a1c..5ca5b2b53bc1 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -8,6 +8,7 @@
> #include <linux/pagemap.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> #include <drm/drm_print.h>
>
> /**
> @@ -578,7 +579,7 @@ static void drm_pagemap_release(struct kref *ref)
> * pagemap provider drm_device and its module.
> */
> dpagemap->dev_hold = NULL;
> - kfree(dpagemap);
> + drm_pagemap_shrinker_add(dpagemap);
> llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
> schedule_work(&drm_pagemap_work);
> /*
> @@ -628,6 +629,58 @@ drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
> return dev_hold;
> }
>
> +/**
> + * drm_pagemap_reinit() - Reinitialize a drm_pagemap
> + * @dpagemap: The drm_pagemap to reinitialize
> + *
> + * Reinitialize a drm_pagemap, for which drm_pagemap_release
> + * has already been called. This interface is intended for the
> + * situation where the driver caches a destroyed drm_pagemap.
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
> +{
> + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
> + if (IS_ERR(dpagemap->dev_hold))
> + return PTR_ERR(dpagemap->dev_hold);
> +
> + kref_init(&dpagemap->ref);
> + return 0;
> +}
> +EXPORT_SYMBOL(drm_pagemap_reinit);
> +
> +/**
> + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
> + * @dpagemap: The drm_pagemap to initialize.
> + * @pagemap: The associated dev_pagemap providing the device
> + * private pages.
> + * @drm: The drm device. The drm_pagemap holds a reference on the
> + * drm_device and the module owning the drm_device until
> + * drm_pagemap_release(). This facilitates drm_pagemap exporting.
> + * @ops: The drm_pagemap ops.
> + *
> + * Initialize and take an initial reference on a drm_pagemap.
> + * After successful return, use drm_pagemap_put() to destroy.
> + *
> + ** Return: 0 on success, negative error code on error.
> + */
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops)
> +{
> + kref_init(&dpagemap->ref);
The above kref init can be dropped as drm_pagemap_reinit does this.
Matt
> + dpagemap->ops = ops;
> + dpagemap->pagemap = pagemap;
> + dpagemap->drm = drm;
> + dpagemap->cache = NULL;
> + INIT_LIST_HEAD(&dpagemap->shrink_link);
> +
> + return drm_pagemap_reinit(dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_init);
> +
> /**
> * drm_pagemap_create() - Create a struct drm_pagemap.
> * @drm: Pointer to a struct drm_device providing the device-private memory.
> @@ -645,22 +698,14 @@ drm_pagemap_create(struct drm_device *drm,
> const struct drm_pagemap_ops *ops)
> {
> struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> - struct drm_pagemap_dev_hold *dev_hold;
> + int err;
>
> if (!dpagemap)
> return ERR_PTR(-ENOMEM);
>
> - kref_init(&dpagemap->ref);
> - dpagemap->drm = drm;
> - dpagemap->ops = ops;
> - dpagemap->pagemap = pagemap;
> -
> - dev_hold = drm_pagemap_dev_hold(dpagemap);
> - if (IS_ERR(dev_hold)) {
> - kfree(dpagemap);
> - return ERR_CAST(dev_hold);
> - }
> - dpagemap->dev_hold = dev_hold;
> + err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> + if (err)
> + return ERR_PTR(err);
>
> return dpagemap;
> }
> @@ -1023,6 +1068,14 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> }
> EXPORT_SYMBOL(drm_pagemap_populate_mm);
>
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
> +{
> + if (dpagemap->ops->destroy)
> + dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
> + else
> + kfree(dpagemap);
> +}
> +
> static void drm_pagemap_exit(void)
> {
> flush_work(&drm_pagemap_work);
> diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
> new file mode 100644
> index 000000000000..e1a1d6bf25f4
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_pagemap_util.c
> @@ -0,0 +1,426 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR MIT
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
> +#include <drm/drm_print.h>
> +
> +/**
> + * struct drm_pagemap_cache - Lookup structure for pagemaps
> + *
> + * Structure to keep track of active (refcount > 1) and inactive
> + * (refcount == 0) pagemaps. Inactive pagemaps can be made active
> + * again by waiting for the @queued completion (indicating that the
> + * pagemap has been put on the @shrinker's list of shrinkable
> + * pagemaps, and then successfully removing it from @shrinker's
> + * list. The latter may fail if the shrinker is already in the
> + * process of freeing the pagemap. A struct drm_pagemap_cache can
> + * hold a single struct drm_pagemap.
> + */
> +struct drm_pagemap_cache {
> + /** @lookup_mutex: Mutex making the lookup process atomic */
> + struct mutex lookup_mutex;
> + /** @lock: Lock protecting the @dpagemap pointer */
> + spinlock_t lock;
> + /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */
> + struct drm_pagemap_shrinker *shrinker;
> + /** @dpagemap: Non-refcounted pointer to the drm_pagemap */
> + struct drm_pagemap *dpagemap;
> + /**
> + * @queued: Signals when an inactive drm_pagemap has been put on
> + * @shrinker's list.
> + */
> + struct completion queued;
> +};
> +
> +/**
> + * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps
> + */
> +struct drm_pagemap_shrinker {
> + /** @drm: Pointer to the drm device. */
> + struct drm_device *drm;
> + /** @lock: Spinlock to protect the @dpagemaps list. */
> + spinlock_t lock;
> + /** @dpagemaps: List of unused dpagemaps. */
> + struct list_head dpagemaps;
> + /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */
> + atomic_t num_dpagemaps;
> + /** @shrink: Pointer to the struct shrinker. */
> + struct shrinker *shrink;
> +};
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap);
> +
> +static void drm_pagemap_cache_fini(void *arg)
> +{
> + struct drm_pagemap_cache *cache = arg;
> + struct drm_pagemap *dpagemap;
> +
> + drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n");
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + goto out;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + drm_pagemap_destroy(dpagemap, false);
> + }
> +
> +out:
> + mutex_destroy(&cache->lookup_mutex);
> + kfree(cache);
> +}
> +
> +/**
> + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
> + * @shrinker: Pointer to a struct drm_pagemap_shrinker.
> + *
> + * Create a device-managed drm_pagemap cache. The cache is automatically
> + * destroyed on struct device removal, at which point any *inactive*
> + * drm_pagemap's are destroyed.
> + *
> + * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer
> + * on failure.
> + */
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker)
> +{
> + struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
> + int err;
> +
> + if (!cache)
> + return ERR_PTR(-ENOMEM);
> +
> + mutex_init(&cache->lookup_mutex);
> + spin_lock_init(&cache->lock);
> + cache->shrinker = shrinker;
> + init_completion(&cache->queued);
> + err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache);
> + if (err)
> + return ERR_PTR(err);
> +
> + return cache;
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
> +
> +/**
> + * DOC: Cache lookup
> + *
> + * Cache lookup should be done under a locked mutex, so that a
> + * failed drm_pagemap_get_from_cache() and a following
> + * drm_pagemap_cache_setpagemap() are carried out as an atomic
> + * operation WRT other lookups. Otherwise, racing lookups may
> + * unnecessarily concurrently create pagemaps to fulfill a
> + * failed lookup. The API provides two functions to perform this lock,
> + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they
> + * should be used in the following way:
> + *
> + * .. code-block:: c
> + *
> + * drm_pagemap_lock_lookup(cache);
> + * dpagemap = drm_pagemap_get_from_cache(cache);
> + * if (dpagemap)
> + * goto out_unlock;
> + *
> + * dpagemap = driver_create_new_dpagemap();
> + * if (!IS_ERR(dpagemap))
> + * drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + *
> + * out_unlock:
> + * drm_pagemap_unlock_lookup(cache);
> + */
> +
> +/**
> + * drm_pagemap_cache_lock_lookup() Lock a drm_pagemap_cache for lookup
> + * @cache: The drm_pagemap_cache to lock.
> + *
> + * Return: %-EINTR if interrupted while blocking. %0 otherwise.
> + */
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
> +{
> + return mutex_lock_interruptible(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
> +
> +/**
> + * drm_pagemap_cache_unlock_lookup() Unlock a drm_pagemap_cache after lookup
> + * @cache: The drm_pagemap_cache to unlock.
> + */
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache)
> +{
> + mutex_unlock(&cache->lookup_mutex);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
> +
> +/**
> + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
> + * @cache: The cache used for lookup.
> + *
> + * If an active pagemap is present in the cache, it is immediately returned.
> + * If an inactive pagemap is present, it's removed from the shrinker list and
> + * an attempt is made to make it active.
> + * If no pagemap present or the attempt to make it active failed, %NULL is returned
> + * to indicate to the caller to create a new drm_pagemap and insert it into
> + * the cache.
> + *
> + * Return: A reference-counted pointer to a drm_pagemap if successful. An error
> + * pointer if an error occurred, or %NULL if no drm_pagemap was found and
> + * the caller should insert a new one.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache)
> +{
> + struct drm_pagemap *dpagemap;
> + int err;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> +retry:
> + spin_lock(&cache->lock);
> + dpagemap = cache->dpagemap;
> + if (drm_pagemap_get_unless_zero(dpagemap)) {
> + spin_unlock(&cache->lock);
> + return dpagemap;
> + }
> +
> + if (!dpagemap) {
> + spin_unlock(&cache->lock);
> + return NULL;
> + }
> +
> + if (!try_wait_for_completion(&cache->queued)) {
> + spin_unlock(&cache->lock);
> + err = wait_for_completion_interruptible(&cache->queued);
> + if (err)
> + return ERR_PTR(err);
> + goto retry;
> + }
> +
> + if (drm_pagemap_shrinker_cancel(dpagemap)) {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + err = drm_pagemap_reinit(dpagemap);
> + if (err) {
> + drm_pagemap_destroy(dpagemap, false);
> + return ERR_PTR(err);
> + }
> + drm_pagemap_cache_set_pagemap(cache, dpagemap);
> + } else {
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> + dpagemap = NULL;
> + }
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache);
> +
> +/**
> + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache
> + * @cache: The cache to assign the drm_pagemap to.
> + * @dpagemap: The drm_pagemap to assign.
> + *
> + * The function must be called to populate a drm_pagemap_cache only
> + * after a call to drm_pagemap_get_from_cache() returns NULL.
> + */
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap)
> +{
> + struct drm_device *drm = dpagemap->drm;
> +
> + lockdep_assert_held(&cache->lookup_mutex);
> + spin_lock(&cache->lock);
> + dpagemap->cache = cache;
> + swap(cache->dpagemap, dpagemap);
> + reinit_completion(&cache->queued);
> + spin_unlock(&cache->lock);
> + drm_WARN_ON(drm, !!dpagemap);
> +}
> +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
> +
> +/**
> + * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps
> + * @cache: The cache to lookup from.
> + *
> + * Function that should be used to lookup a drm_pagemap that is already active.
> + * (refcount > 0).
> + *
> + * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise.
> + */
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache)
> +{
> + struct drm_pagemap *dpagemap;
> +
> + spin_lock(&cache->lock);
> + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
> + spin_unlock(&cache->lock);
> +
> + return dpagemap;
> +}
> +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
> +
> +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_cache *cache = dpagemap->cache;
> + struct drm_pagemap_shrinker *shrinker = cache->shrinker;
> +
> + spin_lock(&shrinker->lock);
> + if (list_empty(&dpagemap->shrink_link)) {
> + spin_unlock(&shrinker->lock);
> + return false;
> + }
> +
> + list_del_init(&dpagemap->shrink_link);
> + atomic_dec(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + return true;
> +}
> +
> +/**
> + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy
> + * @dpagemap: The drm_pagemap.
> + *
> + * If @dpagemap is associated with a &struct drm_pagemap_cache AND the
> + * struct device backing the drm device is still alive, add @dpagemap to
> + * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps.
> + *
> + * Otherwise destroy the pagemap directly using drm_pagemap_destroy().
> + *
> + * This is an internal function which is not intended to be exposed to drivers.
> + */
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
> +{
> + struct drm_pagemap_cache *cache;
> + struct drm_pagemap_shrinker *shrinker;
> + int idx;
> +
> + /*
> + * The pagemap cache and shrinker are disabled at
> + * pci device remove time. After that, dpagemaps
> + * are freed directly.
> + */
> + if (!drm_dev_enter(dpagemap->drm, &idx))
> + goto out_no_cache;
> +
> + cache = dpagemap->cache;
> + if (!cache) {
> + drm_dev_exit(idx);
> + goto out_no_cache;
> + }
> +
> + shrinker = cache->shrinker;
> + spin_lock(&shrinker->lock);
> + list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps);
> + atomic_inc(&shrinker->num_dpagemaps);
> + spin_unlock(&shrinker->lock);
> + complete_all(&cache->queued);
> + drm_dev_exit(idx);
> + return;
> +
> +out_no_cache:
> + drm_pagemap_destroy(dpagemap, true);
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + unsigned long count = atomic_read(&shrinker->num_dpagemaps);
> +
> + return count ? : SHRINK_EMPTY;
> +}
> +
> +static unsigned long
> +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct drm_pagemap_shrinker *shrinker = shrink->private_data;
> + struct drm_pagemap *dpagemap;
> + struct drm_pagemap_cache *cache;
> + unsigned long nr_freed = 0;
> +
> + sc->nr_scanned = 0;
> + spin_lock(&shrinker->lock);
> + do {
> + dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap),
> + shrink_link);
> + if (!dpagemap)
> + break;
> +
> + atomic_dec(&shrinker->num_dpagemaps);
> + list_del_init(&dpagemap->shrink_link);
> + spin_unlock(&shrinker->lock);
> +
> + sc->nr_scanned++;
> + nr_freed++;
> +
> + cache = dpagemap->cache;
> + spin_lock(&cache->lock);
> + cache->dpagemap = NULL;
> + spin_unlock(&cache->lock);
> +
> + drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap);
> + drm_pagemap_destroy(dpagemap, true);
> + spin_lock(&shrinker->lock);
> + } while (sc->nr_scanned < sc->nr_to_scan);
> + spin_unlock(&shrinker->lock);
> +
> + return sc->nr_scanned ? nr_freed : SHRINK_STOP;
> +}
> +
> +static void drm_pagemap_shrinker_fini(void *arg)
> +{
> + struct drm_pagemap_shrinker *shrinker = arg;
> +
> + drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
> + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps));
> + shrinker_free(shrinker->shrink);
> + kfree(shrinker);
> +}
> +
> +/**
> + * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker
> + * @drm: The drm device
> + *
> + * Create and register a pagemap shrinker that shrinks unused pagemaps
> + * and thereby reduces memory footprint.
> + * The shrinker is drm_device managed and unregisters itself when
> + * the drm device is removed.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
> +{
> + struct drm_pagemap_shrinker *shrinker;
> + struct shrinker *shrink;
> + int err;
> +
> + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
> + if (!shrinker)
> + return ERR_PTR(-ENOMEM);
> +
> + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique);
> + if (!shrink) {
> + kfree(shrinker);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + spin_lock_init(&shrinker->lock);
> + INIT_LIST_HEAD(&shrinker->dpagemaps);
> + shrinker->drm = drm;
> + shrinker->shrink = shrink;
> + shrink->count_objects = drm_pagemap_shrinker_count;
> + shrink->scan_objects = drm_pagemap_shrinker_scan;
> + shrink->private_data = shrinker;
> + shrinker_register(shrink);
> +
> + err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker);
> + if (err)
> + return ERR_PTR(err);
> +
> + return shrinker;
> +}
> +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
> index 5cfe54331ba7..4b9af5e785c6 100644
> --- a/include/drm/drm_pagemap.h
> +++ b/include/drm/drm_pagemap.h
> @@ -9,6 +9,7 @@
> #define NR_PAGES(order) (1U << (order))
>
> struct drm_pagemap;
> +struct drm_pagemap_cache;
> struct drm_pagemap_dev_hold;
> struct drm_pagemap_zdd;
> struct device;
> @@ -124,6 +125,25 @@ struct drm_pagemap_ops {
> unsigned long start, unsigned long end,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
> + /**
> + * @destroy: Destroy the drm_pagemap and associated resources.
> + * @dpagemap: The drm_pagemap to destroy.
> + * @is_atomic_or_reclaim: The function may be called from
> + * atomic- or reclaim context.
> + *
> + * The implementation should take care not to attempt to
> + * destroy resources that may already have been destroyed
> + * using devm_ callbacks, since this function may be called
> + * after the underlying struct device has been unbound.
> + * If the implementation defers the execution to a work item
> + * to avoid locking issues, then it must make sure the work
> + * items are flushed before module exit. If the destroy call
> + * happens after the provider's pci_remove() callback has
> + * been executed, a module reference and drm device reference is
> + * held across the destroy callback.
> + */
> + void (*destroy)(struct drm_pagemap *dpagemap,
> + bool is_atomic_or_reclaim);
> };
>
> /**
> @@ -135,6 +155,10 @@ struct drm_pagemap_ops {
> * @pagemap: Pointer to the underlying dev_pagemap.
> * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
> * device referencing.
> + * @cache: Back-pointer to the &struct drm_pagemap_cache used for this
> + * &struct drm_pagemap. May be NULL if no cache is used.
> + * @shrink_link: Link into the shrinker's list of drm_pagemaps. Only
> + * used if also using a pagemap cache.
> */
> struct drm_pagemap {
> const struct drm_pagemap_ops *ops;
> @@ -142,6 +166,8 @@ struct drm_pagemap {
> struct drm_device *drm;
> struct dev_pagemap *pagemap;
> struct drm_pagemap_dev_hold *dev_hold;
> + struct drm_pagemap_cache *cache;
> + struct list_head shrink_link;
> };
>
> struct drm_pagemap_devmem;
> @@ -210,6 +236,11 @@ struct drm_pagemap_devmem_ops {
> unsigned long npages);
> };
>
> +int drm_pagemap_init(struct drm_pagemap *dpagemap,
> + struct dev_pagemap *pagemap,
> + struct drm_device *drm,
> + const struct drm_pagemap_ops *ops);
> +
> struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
> struct dev_pagemap *pagemap,
> const struct drm_pagemap_ops *ops);
> @@ -228,9 +259,9 @@ static inline void drm_pagemap_put(struct drm_pagemap *dpagemap)
>
> /**
> * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap
> - * @dpagemap: Pointer to the struct drm_pagemap.
> + * @dpagemap: Pointer to the struct drm_pagemap, or NULL.
> *
> - * Return: Pointer to the struct drm_pagemap.
> + * Return: Pointer to the struct drm_pagemap, or NULL.
> */
> static inline struct drm_pagemap *
> drm_pagemap_get(struct drm_pagemap *dpagemap)
> @@ -241,6 +272,20 @@ drm_pagemap_get(struct drm_pagemap *dpagemap)
> return dpagemap;
> }
>
> +/**
> + * drm_pagemap_get_unless_zero() - Obtain a reference on a struct drm_pagemap
> + * unless the current reference count is zero.
> + * @dpagemap: Pointer to the drm_pagemap or NULL.
> + *
> + * Return: A pointer to @dpagemap if the reference count was successfully
> + * incremented. NULL if @dpagemap was NULL, or its refcount was 0.
> + */
> +static inline struct drm_pagemap * __must_check
> +drm_pagemap_get_unless_zero(struct drm_pagemap *dpagemap)
> +{
> + return (dpagemap && kref_get_unless_zero(&dpagemap->ref)) ? dpagemap : NULL;
> +}
> +
> /**
> * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation
> *
> @@ -284,5 +329,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms);
>
> -#endif
> +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim);
>
> +int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
> +#endif
> diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
> new file mode 100644
> index 000000000000..292244d429ee
> --- /dev/null
> +++ b/include/drm/drm_pagemap_util.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: MIT */
> +#ifndef _DRM_PAGEMAP_UTIL_H_
> +#define _DRM_PAGEMAP_UTIL_H_
> +
> +struct drm_device;
> +struct drm_pagemap;
> +struct drm_pagemap_cache;
> +struct drm_pagemap_shrinker;
> +
> +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> +
> +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache);
> +
> +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm);
> +
> +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
> +
> +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
> +#endif
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 05/15] drm/xe: Use the drm_pagemap cache and shrinker
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (3 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-30 0:43 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface Thomas Hellström
` (9 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Define a struct xe_pagemap that embeds all pagemap-related
data used by xekmd, and use the drm_pagemap cache- and
shrinker to manage lifetime.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 6 +
drivers/gpu/drm/xe/xe_device_types.h | 5 +
drivers/gpu/drm/xe/xe_svm.c | 354 +++++++++++++++++++++------
drivers/gpu/drm/xe/xe_svm.h | 38 ++-
drivers/gpu/drm/xe/xe_tile.c | 34 ++-
drivers/gpu/drm/xe/xe_tile.h | 21 ++
drivers/gpu/drm/xe/xe_vm_types.h | 1 +
drivers/gpu/drm/xe/xe_vram_types.h | 15 +-
8 files changed, 379 insertions(+), 95 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5f6a412b571c..ad004aab67ce 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -16,6 +16,7 @@
#include <drm/drm_gem_ttm_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_managed.h>
+#include <drm/drm_pagemap_util.h>
#include <drm/drm_print.h>
#include <uapi/drm/xe_drm.h>
@@ -62,6 +63,7 @@
#include "xe_shrinker.h"
#include "xe_survivability_mode.h"
#include "xe_sriov.h"
+#include "xe_svm.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
@@ -465,6 +467,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
init_rwsem(&xe->usm.lock);
+ err = xe_pagemap_shrinker_create(xe);
+ if (err)
+ goto err;
+
xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6a62b520f5b5..dbcfe54b36d8 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -35,6 +35,7 @@
#endif
struct dram_info;
+struct drm_pagemap_shrinker;
struct intel_display;
struct intel_dg_nvm_dev;
struct xe_ggtt;
@@ -418,6 +419,10 @@ struct xe_device {
u32 next_asid;
/** @usm.lock: protects UM state */
struct rw_semaphore lock;
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+ /** @usm.pagemap_shrinker: Shrinker for unused pagemaps */
+ struct drm_pagemap_shrinker *dpagemap_shrinker;
+#endif
} usm;
/** @pinned: pinned BO state */
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index f6ee22da2e95..8aced064c83f 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -4,6 +4,9 @@
*/
#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
#include "xe_bo.h"
#include "xe_exec_queue_types.h"
@@ -19,6 +22,8 @@
#include "xe_vm_types.h"
#include "xe_vram_types.h"
+static int xe_svm_get_pagemaps(struct xe_vm *vm);
+
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
/*
@@ -395,22 +400,34 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
-static struct xe_vram_region *page_to_vr(struct page *page)
+static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap)
{
- return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
+ return xpagemap->vr;
}
-static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
- struct page *page)
+static struct xe_pagemap *xe_page_to_pagemap(struct page *page)
{
- u64 dpa;
+ return container_of(page_pgmap(page), struct xe_pagemap, pagemap);
+}
+
+static struct xe_vram_region *xe_page_to_vr(struct page *page)
+{
+ return xe_pagemap_to_vr(xe_page_to_pagemap(page));
+}
+
+static u64 xe_page_to_dpa(struct page *page)
+{
+ struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
+ u64 hpa_base = xpagemap->hpa_base;
u64 pfn = page_to_pfn(page);
u64 offset;
+ u64 dpa;
xe_assert(vr->xe, is_device_private_page(page));
- xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base);
+ xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
- offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
+ offset = (pfn << PAGE_SHIFT) - hpa_base;
dpa = vr->dpa_base + offset;
return dpa;
@@ -514,11 +531,11 @@ static int xe_svm_copy(struct page **pages,
continue;
if (!vr && spage) {
- vr = page_to_vr(spage);
+ vr = xe_page_to_vr(spage);
gt = xe_migrate_exec_queue(vr->migrate)->gt;
xe = vr->xe;
}
- XE_WARN_ON(spage && page_to_vr(spage) != vr);
+ XE_WARN_ON(spage && xe_page_to_vr(spage) != vr);
/*
* CPU page and device page valid, capture physical address on
@@ -526,7 +543,7 @@ static int xe_svm_copy(struct page **pages,
* device pages.
*/
if (pagemap_addr[i].addr && spage) {
- __vram_addr = xe_vram_region_page_to_dpa(vr, spage);
+ __vram_addr = xe_page_to_dpa(spage);
if (vram_addr == XE_VRAM_ADDR_INVALID) {
vram_addr = __vram_addr;
pos = i;
@@ -672,9 +689,11 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation)
xe_pm_runtime_put(xe);
}
-static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
+static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
{
- return PHYS_PFN(offset + vr->hpa_base);
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+
+ return PHYS_PFN(offset + xpagemap->hpa_base);
}
static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
@@ -694,7 +713,8 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
list_for_each_entry(block, blocks, link) {
struct xe_vram_region *vr = block->private;
struct drm_buddy *buddy = vram_to_buddy(vr);
- u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
+ u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
+ drm_buddy_block_offset(block));
int i;
for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
@@ -711,6 +731,11 @@ static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = {
.copy_to_ram = xe_svm_copy_to_ram,
};
+#else
+static int xe_svm_get_pagemaps(struct xe_vm *vm)
+{
+ return 0;
+}
#endif
static const struct drm_gpusvm_ops gpusvm_ops = {
@@ -725,6 +750,26 @@ static const unsigned long fault_chunk_sizes[] = {
SZ_4K,
};
+static void xe_pagemap_put(struct xe_pagemap *xpagemap)
+{
+ drm_pagemap_put(&xpagemap->dpagemap);
+}
+
+static void xe_svm_put_pagemaps(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_pagemap *xpagemap = vm->svm.pagemaps[id];
+
+ if (xpagemap)
+ xe_pagemap_put(xpagemap);
+ vm->svm.pagemaps[id] = NULL;
+ }
+}
+
/**
* xe_svm_init() - SVM initialize
* @vm: The VM.
@@ -743,12 +788,21 @@ int xe_svm_init(struct xe_vm *vm)
INIT_WORK(&vm->svm.garbage_collector.work,
xe_svm_garbage_collector_work_func);
+ err = xe_svm_get_pagemaps(vm);
+ if (err)
+ return err;
+
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
current->mm, 0, vm->size,
xe_modparam.svm_notifier_size * SZ_1M,
&gpusvm_ops, fault_chunk_sizes,
ARRAY_SIZE(fault_chunk_sizes));
drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+
+ if (err) {
+ xe_svm_put_pagemaps(vm);
+ return err;
+ }
} else {
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
&vm->xe->drm, NULL, 0, 0, 0, NULL,
@@ -768,6 +822,7 @@ void xe_svm_close(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_is_closed(vm));
flush_work(&vm->svm.garbage_collector.work);
+ xe_svm_put_pagemaps(vm);
}
/**
@@ -861,7 +916,8 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct mm_struct *mm,
unsigned long timeslice_ms)
{
- struct xe_vram_region *vr = container_of(dpagemap->pagemap, typeof(*vr), pagemap);
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
struct drm_buddy_block *block;
@@ -1370,11 +1426,6 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
-static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
-{
- return tile->mem.vram->dpagemap;
-}
-
/**
* xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
* @vma: Pointer to the xe_vma structure containing memory attributes
@@ -1400,7 +1451,7 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
return NULL;
if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
- return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL;
+ return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
/* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
return NULL;
@@ -1423,7 +1474,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
- dpagemap = tile_local_pagemap(tile);
+ dpagemap = xe_tile_local_pagemap(tile);
return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
xe_svm_range_end(range),
range->base.gpusvm->mm,
@@ -1442,7 +1493,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
dma_addr_t addr;
if (pgmap_dev == dev) {
- addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
+ addr = xe_page_to_dpa(page);
prot = XE_INTERCONNECT_VRAM;
} else {
addr = DMA_MAPPING_ERROR;
@@ -1452,94 +1503,243 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
return drm_pagemap_addr_encode(addr, prot, order, dir);
}
-static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
- .device_map = xe_drm_pagemap_device_map,
- .populate_mm = xe_drm_pagemap_populate_mm,
-};
+static void xe_pagemap_destroy_work(struct work_struct *work)
+{
+ struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
+ struct dev_pagemap *pagemap = &xpagemap->pagemap;
+ struct drm_device *drm = xpagemap->dpagemap.drm;
+ int idx;
-static void xe_devm_release(void *data)
+ /*
+ * Only unmap / release if devm_ release hasn't run yet.
+ * Otherwise the devm_ callbacks have already released, or
+ * will do shortly.
+ */
+ if (drm_dev_enter(drm, &idx)) {
+ devm_memunmap_pages(drm->dev, pagemap);
+ devm_release_mem_region(drm->dev, pagemap->range.start,
+ pagemap->range.end - pagemap->range.start + 1);
+ drm_dev_exit(idx);
+ }
+ kfree(xpagemap);
+}
+
+static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim)
{
- struct xe_vram_region *vr = data;
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+ struct xe_device *xe = to_xe_device(dpagemap->drm);
- drm_pagemap_put(vr->dpagemap);
- vr->dpagemap = NULL;
+ if (from_atomic_or_reclaim)
+ queue_work(xe->destroy_wq, &xpagemap->destroy_work);
+ else
+ xe_pagemap_destroy_work(&xpagemap->destroy_work);
}
+static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
+ .device_map = xe_drm_pagemap_device_map,
+ .populate_mm = xe_drm_pagemap_populate_mm,
+ .destroy = xe_pagemap_destroy,
+};
+
/**
- * xe_devm_add: Remap and provide memmap backing for device memory
- * @tile: tile that the memory region belongs to
- * @vr: vram memory region to remap
+ * xe_pagemap_create() - Create a struct xe_pagemap object
+ * @xe: The xe device.
+ * @vr: Back-pointer to the struct xe_vram_region.
*
- * This remap device memory to host physical address space and create
- * struct page to back device memory
+ * Allocate and initialize a struct xe_pagemap. On successful
+ * return, drm_pagemap_put() on the embedded struct drm_pagemap
+ * should be used to unreference.
*
- * Return: 0 on success standard error code otherwise
+ * Return: Pointer to a struct xe_pagemap if successful. Error pointer
+ * on failure.
*/
-int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
+static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr)
{
- struct xe_device *xe = tile_to_xe(tile);
- struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
+ struct device *dev = xe->drm.dev;
+ struct xe_pagemap *xpagemap;
+ struct dev_pagemap *pagemap;
+ struct drm_pagemap *dpagemap;
struct resource *res;
void *addr;
- int ret;
+ int err;
+
+ xpagemap = kzalloc(sizeof(*xpagemap), GFP_KERNEL);
+ if (!xpagemap)
+ return ERR_PTR(-ENOMEM);
+
+ pagemap = &xpagemap->pagemap;
+ dpagemap = &xpagemap->dpagemap;
+ INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
+ xpagemap->vr = vr;
+
+ err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
+ if (err)
+ goto out_no_dpagemap;
res = devm_request_free_mem_region(dev, &iomem_resource,
vr->usable_size);
if (IS_ERR(res)) {
- ret = PTR_ERR(res);
- return ret;
+ err = PTR_ERR(res);
+ goto out_err;
}
- vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
- &xe_drm_pagemap_ops);
- if (IS_ERR(vr->dpagemap)) {
- drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
- tile->id, vr->dpagemap);
- ret = PTR_ERR(vr->dpagemap);
- goto out_no_dpagemap;
+ pagemap->type = MEMORY_DEVICE_PRIVATE;
+ pagemap->range.start = res->start;
+ pagemap->range.end = res->end;
+ pagemap->nr_range = 1;
+ pagemap->owner = xe_svm_devm_owner(xe);
+ pagemap->ops = drm_pagemap_pagemap_ops_get();
+ addr = devm_memremap_pages(dev, pagemap);
+ if (IS_ERR(addr)) {
+ err = PTR_ERR(addr);
+ devm_release_mem_region(dev, res->start, res->end - res->start + 1);
+ goto out_err;
}
- ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
- if (ret)
- goto out_no_dpagemap;
+ xpagemap->hpa_base = res->start;
+ return xpagemap;
- vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
- vr->pagemap.range.start = res->start;
- vr->pagemap.range.end = res->end;
- vr->pagemap.nr_range = 1;
- vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
- vr->pagemap.owner = xe_svm_devm_owner(xe);
- addr = devm_memremap_pages(dev, &vr->pagemap);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
- tile->id, ERR_PTR(ret));
- goto out_failed_memremap;
+out_err:
+ drm_pagemap_put(dpagemap);
+ return ERR_PTR(err);
+
+out_no_dpagemap:
+ kfree(xpagemap);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap
+ * @xe: The xe device.
+ * @cache: The struct xe_pagemap_cache.
+ * @vr: The VRAM region.
+ *
+ * Check if there is an already used xe_pagemap for this tile, and in that case,
+ * return it.
+ * If not, check if there is a cached xe_pagemap for this tile, and in that case,
+ * cancel its destruction, re-initialize it and return it.
+ * Finally if there is no cached or already used pagemap, create one and
+ * register it in the tile's pagemap cache.
+ *
+ * Note that this function is typically called from within an IOCTL, and waits are
+ * therefore carried out interruptible if possible.
+ *
+ * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure.
+ */
+static struct xe_pagemap *
+xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache,
+ struct xe_vram_region *vr)
+{
+ struct drm_pagemap *dpagemap;
+ struct xe_pagemap *xpagemap;
+ int err;
+
+ err = drm_pagemap_cache_lock_lookup(cache);
+ if (err)
+ return ERR_PTR(err);
+
+ dpagemap = drm_pagemap_get_from_cache(cache);
+ if (IS_ERR(dpagemap)) {
+ xpagemap = ERR_CAST(dpagemap);
+ } else if (!dpagemap) {
+ xpagemap = xe_pagemap_create(xe, vr);
+ if (IS_ERR(xpagemap))
+ goto out_unlock;
+ drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap);
+ } else {
+ xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+ }
+
+out_unlock:
+ drm_pagemap_cache_unlock_lookup(cache);
+ return xpagemap;
+}
+
+static int xe_svm_get_pagemaps(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_pagemap *xpagemap;
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_vram_region *vr;
+
+ if (!((BIT(id) << 1) & xe->info.mem_region_mask))
+ continue;
+
+ vr = xe_tile_to_vr(tile);
+ xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
+ if (IS_ERR(xpagemap))
+ break;
+ vm->svm.pagemaps[id] = xpagemap;
+ }
+
+ if (IS_ERR(xpagemap)) {
+ xe_svm_put_pagemaps(vm);
+ return PTR_ERR(xpagemap);
}
- vr->hpa_base = res->start;
- drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
- tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
return 0;
+}
-out_failed_memremap:
- drm_pagemap_put(vr->dpagemap);
-out_no_dpagemap:
- devm_release_mem_region(dev, res->start, resource_size(res));
- return ret;
+/**
+ * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker
+ * @xe: The xe device
+ *
+ * Create a drm_pagemap shrinker and register with the xe device.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_pagemap_shrinker_create(struct xe_device *xe)
+{
+ xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm);
+ return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker);
}
+
+/**
+ * xe_pagemap_cache_create() - Create a drm_pagemap cache
+ * @tile: The tile to register the cache with
+ *
+ * Create a drm_pagemap cache and register with the tile.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_pagemap_cache_create(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+
+ if (IS_DGFX(xe)) {
+ struct drm_pagemap_cache *cache =
+ drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker);
+
+ if (IS_ERR(cache))
+ return PTR_ERR(cache);
+
+ tile->mem.vram->dpagemap_cache = cache;
+ }
+
+ return 0;
+}
+
#else
-int xe_svm_alloc_vram(struct xe_tile *tile,
- struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+
+int xe_pagemap_shrinker_create(struct xe_device *xe)
{
- return -EOPNOTSUPP;
+ return 0;
}
-int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
+int xe_pagemap_cache_create(struct xe_tile *tile)
{
return 0;
}
+int xe_svm_alloc_vram(struct xe_tile *tile,
+ struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
{
return NULL;
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index fa757dd07954..8a49ff17ef0c 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -27,8 +27,13 @@ static inline void *xe_svm_devm_owner(struct xe_device *xe)
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
+struct drm_device;
+struct drm_file;
+
struct xe_bo;
struct xe_gt;
+struct xe_device;
+struct xe_vram_region;
struct xe_tile;
struct xe_vm;
struct xe_vma;
@@ -55,6 +60,22 @@ struct xe_svm_range {
u8 tile_invalidated;
};
+/**
+ * struct xe_pagemap - Manages xe device_private memory for SVM.
+ * @pagemap: The struct dev_pagemap providing the struct pages.
+ * @dpagemap: The drm_pagemap managing allocation and migration.
+ * @destroy_work: Handles asnynchronous destruction and caching.
+ * @hpa_base: The host physical address base for the managemd memory.
+ * @vr: Backpointer to the xe_vram region.
+ */
+struct xe_pagemap {
+ struct dev_pagemap pagemap;
+ struct drm_pagemap dpagemap;
+ struct work_struct destroy_work;
+ resource_size_t hpa_base;
+ struct xe_vram_region *vr;
+};
+
/**
* xe_svm_range_pages_valid() - SVM range pages valid
* @range: SVM range
@@ -171,6 +192,10 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
void xe_svm_flush(struct xe_vm *vm);
+int xe_pagemap_shrinker_create(struct xe_device *xe);
+
+int xe_pagemap_cache_create(struct xe_tile *tile);
+
#else
#include <linux/interval_tree.h>
#include "xe_vm.h"
@@ -179,7 +204,7 @@ struct drm_pagemap_addr;
struct drm_gpusvm_ctx;
struct drm_gpusvm_range;
struct xe_bo;
-struct xe_gt;
+struct xe_device;
struct xe_vm;
struct xe_vma;
struct xe_tile;
@@ -346,6 +371,17 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
static inline void xe_svm_flush(struct xe_vm *vm)
{
}
+
+static inline int xe_pagemap_shrinker_create(struct xe_device *xe)
+{
+ return 0;
+}
+
+static inline int xe_pagemap_cache_create(struct xe_tile *tile)
+{
+ return 0;
+}
+
#define xe_svm_range_has_dma_mapping(...) false
#endif /* CONFIG_DRM_XE_GPUSVM */
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 4f4f9a5c43af..051b191377df 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -6,6 +6,7 @@
#include <linux/fault-inject.h>
#include <drm/drm_managed.h>
+#include <drm/drm_pagemap_util.h>
#include "xe_bo.h"
#include "xe_device.h"
@@ -180,17 +181,19 @@ ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
int xe_tile_init_noalloc(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
+ int err;
xe_wa_apply_tile_workarounds(tile);
- if (xe->info.has_usm && IS_DGFX(xe))
- xe_devm_add(tile, tile->mem.vram);
+ err = xe_pagemap_cache_create(tile);
+ if (err)
+ return err;
if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) {
- int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
-
+ err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
if (err)
return err;
+
xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1;
}
@@ -215,3 +218,26 @@ void xe_tile_migrate_wait(struct xe_tile *tile)
{
xe_migrate_wait(tile->migrate);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+/**
+ * xe_tile_local_pagemap() - Return a pointer to the tile's local drm_pagemap if any
+ * @tile: The tile.
+ *
+ * Return: A pointer to the tile's local drm_pagemap, or NULL if local pagemap
+ * support has been compiled out.
+ */
+struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
+{
+ struct drm_pagemap *dpagemap =
+ drm_pagemap_get_from_cache_if_active(xe_tile_to_vr(tile)->dpagemap_cache);
+
+ if (dpagemap) {
+ xe_assert(tile_to_xe(tile), kref_read(&dpagemap->ref) >= 2);
+ drm_pagemap_put(dpagemap);
+ }
+
+ return dpagemap;
+}
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index dceb6297aa01..734132eddda5 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -8,6 +8,7 @@
#include "xe_device_types.h"
+struct xe_pagemap;
struct xe_tile;
int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
@@ -23,4 +24,24 @@ static inline bool xe_tile_is_root(struct xe_tile *tile)
return tile->id == 0;
}
+/**
+ * xe_tile_to_vr() - Return the struct xe_vram_region pointer from a
+ * struct xe_tile pointer
+ * @tile: Pointer to the struct xe_tile.
+ *
+ * Return: Pointer to the struct xe_vram_region embedded in *@tile.
+ */
+static inline struct xe_vram_region *xe_tile_to_vr(struct xe_tile *tile)
+{
+ return tile->mem.vram;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile);
+#else
+static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
+{
+ return NULL;
+}
+#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index d6e2a0fdd4b3..9f0d8bf1af4f 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -191,6 +191,7 @@ struct xe_vm {
*/
struct work_struct work;
} garbage_collector;
+ struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE];
} svm;
struct xe_device *xe;
diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
index c0d2c5ee8c10..646e3c12ae9f 100644
--- a/drivers/gpu/drm/xe/xe_vram_types.h
+++ b/drivers/gpu/drm/xe/xe_vram_types.h
@@ -66,19 +66,8 @@ struct xe_vram_region {
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
/** @migrate: Back pointer to migrate */
struct xe_migrate *migrate;
- /** @pagemap: Used to remap device memory as ZONE_DEVICE */
- struct dev_pagemap pagemap;
- /**
- * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
- * pages of this tile.
- */
- struct drm_pagemap *dpagemap;
- /**
- * @hpa_base: base host physical address
- *
- * This is generated when remap device memory as ZONE_DEVICE
- */
- resource_size_t hpa_base;
+ /** @dpagemap_cache: drm_pagemap cache. */
+ struct drm_pagemap_cache *dpagemap_cache;
#endif
};
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 05/15] drm/xe: Use the drm_pagemap cache and shrinker
2025-10-25 12:04 ` [PATCH 05/15] drm/xe: Use the " Thomas Hellström
@ 2025-10-30 0:43 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-30 0:43 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:02PM +0200, Thomas Hellström wrote:
> Define a struct xe_pagemap that embeds all pagemap-related
> data used by xekmd, and use the drm_pagemap cache- and
> shrinker to manage lifetime.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_device.c | 6 +
> drivers/gpu/drm/xe/xe_device_types.h | 5 +
> drivers/gpu/drm/xe/xe_svm.c | 354 +++++++++++++++++++++------
> drivers/gpu/drm/xe/xe_svm.h | 38 ++-
> drivers/gpu/drm/xe/xe_tile.c | 34 ++-
> drivers/gpu/drm/xe/xe_tile.h | 21 ++
> drivers/gpu/drm/xe/xe_vm_types.h | 1 +
> drivers/gpu/drm/xe/xe_vram_types.h | 15 +-
> 8 files changed, 379 insertions(+), 95 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 5f6a412b571c..ad004aab67ce 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -16,6 +16,7 @@
> #include <drm/drm_gem_ttm_helper.h>
> #include <drm/drm_ioctl.h>
> #include <drm/drm_managed.h>
> +#include <drm/drm_pagemap_util.h>
> #include <drm/drm_print.h>
> #include <uapi/drm/xe_drm.h>
>
> @@ -62,6 +63,7 @@
> #include "xe_shrinker.h"
> #include "xe_survivability_mode.h"
> #include "xe_sriov.h"
> +#include "xe_svm.h"
> #include "xe_tile.h"
> #include "xe_ttm_stolen_mgr.h"
> #include "xe_ttm_sys_mgr.h"
> @@ -465,6 +467,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>
> init_rwsem(&xe->usm.lock);
>
> + err = xe_pagemap_shrinker_create(xe);
> + if (err)
> + goto err;
> +
> xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
>
> if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 6a62b520f5b5..dbcfe54b36d8 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -35,6 +35,7 @@
> #endif
>
> struct dram_info;
> +struct drm_pagemap_shrinker;
> struct intel_display;
> struct intel_dg_nvm_dev;
> struct xe_ggtt;
> @@ -418,6 +419,10 @@ struct xe_device {
> u32 next_asid;
> /** @usm.lock: protects UM state */
> struct rw_semaphore lock;
> +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
> + /** @usm.pagemap_shrinker: Shrinker for unused pagemaps */
> + struct drm_pagemap_shrinker *dpagemap_shrinker;
> +#endif
> } usm;
>
> /** @pinned: pinned BO state */
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index f6ee22da2e95..8aced064c83f 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -4,6 +4,9 @@
> */
>
> #include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <drm/drm_pagemap.h>
> +#include <drm/drm_pagemap_util.h>
>
> #include "xe_bo.h"
> #include "xe_exec_queue_types.h"
> @@ -19,6 +22,8 @@
> #include "xe_vm_types.h"
> #include "xe_vram_types.h"
>
> +static int xe_svm_get_pagemaps(struct xe_vm *vm);
> +
> static bool xe_svm_range_in_vram(struct xe_svm_range *range)
> {
> /*
> @@ -395,22 +400,34 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
>
> #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
>
> -static struct xe_vram_region *page_to_vr(struct page *page)
> +static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap)
> {
> - return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
> + return xpagemap->vr;
> }
>
> -static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
> - struct page *page)
> +static struct xe_pagemap *xe_page_to_pagemap(struct page *page)
> {
> - u64 dpa;
> + return container_of(page_pgmap(page), struct xe_pagemap, pagemap);
> +}
> +
> +static struct xe_vram_region *xe_page_to_vr(struct page *page)
> +{
> + return xe_pagemap_to_vr(xe_page_to_pagemap(page));
> +}
> +
> +static u64 xe_page_to_dpa(struct page *page)
> +{
> + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
> + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> + u64 hpa_base = xpagemap->hpa_base;
> u64 pfn = page_to_pfn(page);
> u64 offset;
> + u64 dpa;
>
> xe_assert(vr->xe, is_device_private_page(page));
> - xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base);
> + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
>
> - offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
> + offset = (pfn << PAGE_SHIFT) - hpa_base;
> dpa = vr->dpa_base + offset;
>
> return dpa;
> @@ -514,11 +531,11 @@ static int xe_svm_copy(struct page **pages,
> continue;
>
> if (!vr && spage) {
> - vr = page_to_vr(spage);
> + vr = xe_page_to_vr(spage);
> gt = xe_migrate_exec_queue(vr->migrate)->gt;
> xe = vr->xe;
> }
> - XE_WARN_ON(spage && page_to_vr(spage) != vr);
> + XE_WARN_ON(spage && xe_page_to_vr(spage) != vr);
>
> /*
> * CPU page and device page valid, capture physical address on
> @@ -526,7 +543,7 @@ static int xe_svm_copy(struct page **pages,
> * device pages.
> */
> if (pagemap_addr[i].addr && spage) {
> - __vram_addr = xe_vram_region_page_to_dpa(vr, spage);
> + __vram_addr = xe_page_to_dpa(spage);
> if (vram_addr == XE_VRAM_ADDR_INVALID) {
> vram_addr = __vram_addr;
> pos = i;
> @@ -672,9 +689,11 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation)
> xe_pm_runtime_put(xe);
> }
>
> -static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
> +static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
> {
> - return PHYS_PFN(offset + vr->hpa_base);
> + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
> +
> + return PHYS_PFN(offset + xpagemap->hpa_base);
> }
>
> static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
> @@ -694,7 +713,8 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
> list_for_each_entry(block, blocks, link) {
> struct xe_vram_region *vr = block->private;
> struct drm_buddy *buddy = vram_to_buddy(vr);
> - u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
> + u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
> + drm_buddy_block_offset(block));
> int i;
>
> for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
> @@ -711,6 +731,11 @@ static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = {
> .copy_to_ram = xe_svm_copy_to_ram,
> };
>
> +#else
> +static int xe_svm_get_pagemaps(struct xe_vm *vm)
> +{
> + return 0;
> +}
> #endif
>
> static const struct drm_gpusvm_ops gpusvm_ops = {
> @@ -725,6 +750,26 @@ static const unsigned long fault_chunk_sizes[] = {
> SZ_4K,
> };
>
> +static void xe_pagemap_put(struct xe_pagemap *xpagemap)
> +{
> + drm_pagemap_put(&xpagemap->dpagemap);
> +}
> +
> +static void xe_svm_put_pagemaps(struct xe_vm *vm)
> +{
> + struct xe_device *xe = vm->xe;
> + struct xe_tile *tile;
> + int id;
> +
> + for_each_tile(tile, xe, id) {
> + struct xe_pagemap *xpagemap = vm->svm.pagemaps[id];
> +
> + if (xpagemap)
> + xe_pagemap_put(xpagemap);
> + vm->svm.pagemaps[id] = NULL;
> + }
> +}
> +
> /**
> * xe_svm_init() - SVM initialize
> * @vm: The VM.
> @@ -743,12 +788,21 @@ int xe_svm_init(struct xe_vm *vm)
> INIT_WORK(&vm->svm.garbage_collector.work,
> xe_svm_garbage_collector_work_func);
>
> + err = xe_svm_get_pagemaps(vm);
> + if (err)
> + return err;
> +
> err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
> current->mm, 0, vm->size,
> xe_modparam.svm_notifier_size * SZ_1M,
> &gpusvm_ops, fault_chunk_sizes,
> ARRAY_SIZE(fault_chunk_sizes));
> drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
> +
> + if (err) {
> + xe_svm_put_pagemaps(vm);
> + return err;
> + }
> } else {
> err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
> &vm->xe->drm, NULL, 0, 0, 0, NULL,
> @@ -768,6 +822,7 @@ void xe_svm_close(struct xe_vm *vm)
> {
> xe_assert(vm->xe, xe_vm_is_closed(vm));
> flush_work(&vm->svm.garbage_collector.work);
> + xe_svm_put_pagemaps(vm);
> }
>
> /**
> @@ -861,7 +916,8 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> struct mm_struct *mm,
> unsigned long timeslice_ms)
> {
> - struct xe_vram_region *vr = container_of(dpagemap->pagemap, typeof(*vr), pagemap);
> + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
> + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> struct xe_device *xe = vr->xe;
> struct device *dev = xe->drm.dev;
> struct drm_buddy_block *block;
> @@ -1370,11 +1426,6 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
>
> #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
>
> -static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
> -{
> - return tile->mem.vram->dpagemap;
> -}
> -
> /**
> * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
> * @vma: Pointer to the xe_vma structure containing memory attributes
> @@ -1400,7 +1451,7 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> return NULL;
>
> if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
> - return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL;
> + return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
>
> /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
> return NULL;
> @@ -1423,7 +1474,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
> xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
> range_debug(range, "ALLOCATE VRAM");
>
> - dpagemap = tile_local_pagemap(tile);
> + dpagemap = xe_tile_local_pagemap(tile);
> return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
> xe_svm_range_end(range),
> range->base.gpusvm->mm,
> @@ -1442,7 +1493,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
> dma_addr_t addr;
>
> if (pgmap_dev == dev) {
> - addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
> + addr = xe_page_to_dpa(page);
> prot = XE_INTERCONNECT_VRAM;
> } else {
> addr = DMA_MAPPING_ERROR;
> @@ -1452,94 +1503,243 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
> return drm_pagemap_addr_encode(addr, prot, order, dir);
> }
>
> -static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> - .device_map = xe_drm_pagemap_device_map,
> - .populate_mm = xe_drm_pagemap_populate_mm,
> -};
> +static void xe_pagemap_destroy_work(struct work_struct *work)
> +{
> + struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
> + struct dev_pagemap *pagemap = &xpagemap->pagemap;
> + struct drm_device *drm = xpagemap->dpagemap.drm;
> + int idx;
>
> -static void xe_devm_release(void *data)
> + /*
> + * Only unmap / release if devm_ release hasn't run yet.
> + * Otherwise the devm_ callbacks have already released, or
> + * will do shortly.
> + */
> + if (drm_dev_enter(drm, &idx)) {
> + devm_memunmap_pages(drm->dev, pagemap);
> + devm_release_mem_region(drm->dev, pagemap->range.start,
> + pagemap->range.end - pagemap->range.start + 1);
> + drm_dev_exit(idx);
> + }
> + kfree(xpagemap);
> +}
> +
> +static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim)
> {
> - struct xe_vram_region *vr = data;
> + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
> + struct xe_device *xe = to_xe_device(dpagemap->drm);
>
> - drm_pagemap_put(vr->dpagemap);
> - vr->dpagemap = NULL;
> + if (from_atomic_or_reclaim)
> + queue_work(xe->destroy_wq, &xpagemap->destroy_work);
> + else
> + xe_pagemap_destroy_work(&xpagemap->destroy_work);
> }
>
> +static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> + .device_map = xe_drm_pagemap_device_map,
> + .populate_mm = xe_drm_pagemap_populate_mm,
> + .destroy = xe_pagemap_destroy,
> +};
> +
> /**
> - * xe_devm_add: Remap and provide memmap backing for device memory
> - * @tile: tile that the memory region belongs to
> - * @vr: vram memory region to remap
> + * xe_pagemap_create() - Create a struct xe_pagemap object
> + * @xe: The xe device.
> + * @vr: Back-pointer to the struct xe_vram_region.
> *
> - * This remap device memory to host physical address space and create
> - * struct page to back device memory
> + * Allocate and initialize a struct xe_pagemap. On successful
> + * return, drm_pagemap_put() on the embedded struct drm_pagemap
> + * should be used to unreference.
> *
> - * Return: 0 on success standard error code otherwise
> + * Return: Pointer to a struct xe_pagemap if successful. Error pointer
> + * on failure.
> */
> -int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> +static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr)
> {
> - struct xe_device *xe = tile_to_xe(tile);
> - struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
> + struct device *dev = xe->drm.dev;
> + struct xe_pagemap *xpagemap;
> + struct dev_pagemap *pagemap;
> + struct drm_pagemap *dpagemap;
> struct resource *res;
> void *addr;
> - int ret;
> + int err;
> +
> + xpagemap = kzalloc(sizeof(*xpagemap), GFP_KERNEL);
> + if (!xpagemap)
> + return ERR_PTR(-ENOMEM);
> +
> + pagemap = &xpagemap->pagemap;
> + dpagemap = &xpagemap->dpagemap;
> + INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
> + xpagemap->vr = vr;
> +
> + err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
> + if (err)
> + goto out_no_dpagemap;
>
> res = devm_request_free_mem_region(dev, &iomem_resource,
> vr->usable_size);
> if (IS_ERR(res)) {
> - ret = PTR_ERR(res);
> - return ret;
> + err = PTR_ERR(res);
> + goto out_err;
> }
>
> - vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
> - &xe_drm_pagemap_ops);
> - if (IS_ERR(vr->dpagemap)) {
> - drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
> - tile->id, vr->dpagemap);
> - ret = PTR_ERR(vr->dpagemap);
> - goto out_no_dpagemap;
> + pagemap->type = MEMORY_DEVICE_PRIVATE;
> + pagemap->range.start = res->start;
> + pagemap->range.end = res->end;
> + pagemap->nr_range = 1;
> + pagemap->owner = xe_svm_devm_owner(xe);
> + pagemap->ops = drm_pagemap_pagemap_ops_get();
> + addr = devm_memremap_pages(dev, pagemap);
> + if (IS_ERR(addr)) {
> + err = PTR_ERR(addr);
> + devm_release_mem_region(dev, res->start, res->end - res->start + 1);
> + goto out_err;
> }
> - ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
> - if (ret)
> - goto out_no_dpagemap;
> + xpagemap->hpa_base = res->start;
> + return xpagemap;
>
> - vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> - vr->pagemap.range.start = res->start;
> - vr->pagemap.range.end = res->end;
> - vr->pagemap.nr_range = 1;
> - vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
> - vr->pagemap.owner = xe_svm_devm_owner(xe);
> - addr = devm_memremap_pages(dev, &vr->pagemap);
> - if (IS_ERR(addr)) {
> - ret = PTR_ERR(addr);
> - drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
> - tile->id, ERR_PTR(ret));
> - goto out_failed_memremap;
> +out_err:
> + drm_pagemap_put(dpagemap);
> + return ERR_PTR(err);
> +
> +out_no_dpagemap:
> + kfree(xpagemap);
> + return ERR_PTR(err);
> +}
> +
> +/**
> + * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap
> + * @xe: The xe device.
> + * @cache: The struct xe_pagemap_cache.
> + * @vr: The VRAM region.
> + *
> + * Check if there is an already used xe_pagemap for this tile, and in that case,
> + * return it.
> + * If not, check if there is a cached xe_pagemap for this tile, and in that case,
> + * cancel its destruction, re-initialize it and return it.
> + * Finally if there is no cached or already used pagemap, create one and
> + * register it in the tile's pagemap cache.
> + *
> + * Note that this function is typically called from within an IOCTL, and waits are
> + * therefore carried out interruptible if possible.
> + *
> + * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure.
> + */
I'd mention that not only is a xe_pagemap returned, but also a reference
that must eventually be dropped.
> +static struct xe_pagemap *
> +xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache,
> + struct xe_vram_region *vr)
> +{
> + struct drm_pagemap *dpagemap;
> + struct xe_pagemap *xpagemap;
> + int err;
> +
> + err = drm_pagemap_cache_lock_lookup(cache);
> + if (err)
> + return ERR_PTR(err);
> +
> + dpagemap = drm_pagemap_get_from_cache(cache);
> + if (IS_ERR(dpagemap)) {
> + xpagemap = ERR_CAST(dpagemap);
> + } else if (!dpagemap) {
> + xpagemap = xe_pagemap_create(xe, vr);
> + if (IS_ERR(xpagemap))
> + goto out_unlock;
> + drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap);
> + } else {
> + xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
> + }
> +
> +out_unlock:
> + drm_pagemap_cache_unlock_lookup(cache);
> + return xpagemap;
> +}
> +
> +static int xe_svm_get_pagemaps(struct xe_vm *vm)
> +{
> + struct xe_device *xe = vm->xe;
> + struct xe_pagemap *xpagemap;
> + struct xe_tile *tile;
> + int id;
> +
> + for_each_tile(tile, xe, id) {
> + struct xe_vram_region *vr;
> +
> + if (!((BIT(id) << 1) & xe->info.mem_region_mask))
> + continue;
> +
> + vr = xe_tile_to_vr(tile);
> + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
> + if (IS_ERR(xpagemap))
> + break;
> + vm->svm.pagemaps[id] = xpagemap;
> + }
> +
> + if (IS_ERR(xpagemap)) {
> + xe_svm_put_pagemaps(vm);
> + return PTR_ERR(xpagemap);
> }
> - vr->hpa_base = res->start;
>
> - drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
> - tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
> return 0;
> +}
>
> -out_failed_memremap:
> - drm_pagemap_put(vr->dpagemap);
> -out_no_dpagemap:
> - devm_release_mem_region(dev, res->start, resource_size(res));
> - return ret;
> +/**
> + * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker
> + * @xe: The xe device
> + *
> + * Create a drm_pagemap shrinker and register with the xe device.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +int xe_pagemap_shrinker_create(struct xe_device *xe)
> +{
> + xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm);
> + return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker);
> }
> +
> +/**
> + * xe_pagemap_cache_create() - Create a drm_pagemap cache
> + * @tile: The tile to register the cache with
> + *
> + * Create a drm_pagemap cache and register with the tile.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +int xe_pagemap_cache_create(struct xe_tile *tile)
> +{
> + struct xe_device *xe = tile_to_xe(tile);
> +
> + if (IS_DGFX(xe)) {
I think we need to skip this step if vram->dpagemap_cache is already
set. IIRC, some patches were merged allowing tile->vram to be the same
object across multiple tiles. No current platforms use this mode, but
it’s forward-looking for potential upcoming platforms.
> + struct drm_pagemap_cache *cache =
> + drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker);
> +
> + if (IS_ERR(cache))
> + return PTR_ERR(cache);
> +
> + tile->mem.vram->dpagemap_cache = cache;
> + }
> +
> + return 0;
> +}
> +
> #else
> -int xe_svm_alloc_vram(struct xe_tile *tile,
> - struct xe_svm_range *range,
> - const struct drm_gpusvm_ctx *ctx)
> +
> +int xe_pagemap_shrinker_create(struct xe_device *xe)
> {
> - return -EOPNOTSUPP;
> + return 0;
> }
>
> -int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
> +int xe_pagemap_cache_create(struct xe_tile *tile)
> {
> return 0;
> }
>
> +int xe_svm_alloc_vram(struct xe_tile *tile,
> + struct xe_svm_range *range,
> + const struct drm_gpusvm_ctx *ctx)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
> {
> return NULL;
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index fa757dd07954..8a49ff17ef0c 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -27,8 +27,13 @@ static inline void *xe_svm_devm_owner(struct xe_device *xe)
>
> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
>
> +struct drm_device;
> +struct drm_file;
> +
> struct xe_bo;
> struct xe_gt;
> +struct xe_device;
Alphabetical order.
> +struct xe_vram_region;
> struct xe_tile;
> struct xe_vm;
> struct xe_vma;
> @@ -55,6 +60,22 @@ struct xe_svm_range {
> u8 tile_invalidated;
> };
>
> +/**
> + * struct xe_pagemap - Manages xe device_private memory for SVM.
> + * @pagemap: The struct dev_pagemap providing the struct pages.
> + * @dpagemap: The drm_pagemap managing allocation and migration.
> + * @destroy_work: Handles asnynchronous destruction and caching.
> + * @hpa_base: The host physical address base for the managemd memory.
> + * @vr: Backpointer to the xe_vram region.
> + */
> +struct xe_pagemap {
> + struct dev_pagemap pagemap;
> + struct drm_pagemap dpagemap;
> + struct work_struct destroy_work;
> + resource_size_t hpa_base;
> + struct xe_vram_region *vr;
> +};
> +
> /**
> * xe_svm_range_pages_valid() - SVM range pages valid
> * @range: SVM range
> @@ -171,6 +192,10 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
>
> void xe_svm_flush(struct xe_vm *vm);
>
> +int xe_pagemap_shrinker_create(struct xe_device *xe);
> +
> +int xe_pagemap_cache_create(struct xe_tile *tile);
> +
> #else
> #include <linux/interval_tree.h>
> #include "xe_vm.h"
> @@ -179,7 +204,7 @@ struct drm_pagemap_addr;
> struct drm_gpusvm_ctx;
> struct drm_gpusvm_range;
> struct xe_bo;
> -struct xe_gt;
> +struct xe_device;
> struct xe_vm;
> struct xe_vma;
> struct xe_tile;
> @@ -346,6 +371,17 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> static inline void xe_svm_flush(struct xe_vm *vm)
> {
> }
> +
> +static inline int xe_pagemap_shrinker_create(struct xe_device *xe)
> +{
> + return 0;
> +}
> +
> +static inline int xe_pagemap_cache_create(struct xe_tile *tile)
> +{
> + return 0;
> +}
> +
> #define xe_svm_range_has_dma_mapping(...) false
> #endif /* CONFIG_DRM_XE_GPUSVM */
>
> diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
> index 4f4f9a5c43af..051b191377df 100644
> --- a/drivers/gpu/drm/xe/xe_tile.c
> +++ b/drivers/gpu/drm/xe/xe_tile.c
> @@ -6,6 +6,7 @@
> #include <linux/fault-inject.h>
>
> #include <drm/drm_managed.h>
> +#include <drm/drm_pagemap_util.h>
>
> #include "xe_bo.h"
> #include "xe_device.h"
> @@ -180,17 +181,19 @@ ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
> int xe_tile_init_noalloc(struct xe_tile *tile)
> {
> struct xe_device *xe = tile_to_xe(tile);
> + int err;
>
> xe_wa_apply_tile_workarounds(tile);
>
> - if (xe->info.has_usm && IS_DGFX(xe))
> - xe_devm_add(tile, tile->mem.vram);
> + err = xe_pagemap_cache_create(tile);
> + if (err)
> + return err;
>
> if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) {
I think you potentially move xe_pagemap_cache_create under this logic
too to avoid double init when tiles point to the vram object.
> - int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
> -
> + err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
> if (err)
> return err;
> +
> xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1;
> }
>
> @@ -215,3 +218,26 @@ void xe_tile_migrate_wait(struct xe_tile *tile)
> {
> xe_migrate_wait(tile->migrate);
> }
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
> +/**
> + * xe_tile_local_pagemap() - Return a pointer to the tile's local drm_pagemap if any
> + * @tile: The tile.
> + *
> + * Return: A pointer to the tile's local drm_pagemap, or NULL if local pagemap
> + * support has been compiled out.
> + */
Mention no reference is taken?
> +struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
> +{
> + struct drm_pagemap *dpagemap =
> + drm_pagemap_get_from_cache_if_active(xe_tile_to_vr(tile)->dpagemap_cache);
> +
> + if (dpagemap) {
> + xe_assert(tile_to_xe(tile), kref_read(&dpagemap->ref) >= 2);
> + drm_pagemap_put(dpagemap);
> + }
> +
> + return dpagemap;
> +}
> +#endif
> +
> diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
> index dceb6297aa01..734132eddda5 100644
> --- a/drivers/gpu/drm/xe/xe_tile.h
> +++ b/drivers/gpu/drm/xe/xe_tile.h
> @@ -8,6 +8,7 @@
>
> #include "xe_device_types.h"
>
> +struct xe_pagemap;
> struct xe_tile;
>
> int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
> @@ -23,4 +24,24 @@ static inline bool xe_tile_is_root(struct xe_tile *tile)
> return tile->id == 0;
> }
>
> +/**
> + * xe_tile_to_vr() - Return the struct xe_vram_region pointer from a
> + * struct xe_tile pointer
> + * @tile: Pointer to the struct xe_tile.
> + *
> + * Return: Pointer to the struct xe_vram_region embedded in *@tile.
> + */
> +static inline struct xe_vram_region *xe_tile_to_vr(struct xe_tile *tile)
> +{
> + return tile->mem.vram;
> +}
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
> +struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile);
> +#else
> +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
> +{
> + return NULL;
> +}
> +#endif
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index d6e2a0fdd4b3..9f0d8bf1af4f 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -191,6 +191,7 @@ struct xe_vm {
> */
> struct work_struct work;
> } garbage_collector;
> + struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE];
I know this file isn't great at kernel doc, but let's not make it worse
but omitting it.
Matt
> } svm;
>
> struct xe_device *xe;
> diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
> index c0d2c5ee8c10..646e3c12ae9f 100644
> --- a/drivers/gpu/drm/xe/xe_vram_types.h
> +++ b/drivers/gpu/drm/xe/xe_vram_types.h
> @@ -66,19 +66,8 @@ struct xe_vram_region {
> #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
> /** @migrate: Back pointer to migrate */
> struct xe_migrate *migrate;
> - /** @pagemap: Used to remap device memory as ZONE_DEVICE */
> - struct dev_pagemap pagemap;
> - /**
> - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
> - * pages of this tile.
> - */
> - struct drm_pagemap *dpagemap;
> - /**
> - * @hpa_base: base host physical address
> - *
> - * This is generated when remap device memory as ZONE_DEVICE
> - */
> - resource_size_t hpa_base;
> + /** @dpagemap_cache: drm_pagemap cache. */
> + struct drm_pagemap_cache *dpagemap_cache;
> #endif
> };
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (4 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 05/15] drm/xe: Use the " Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-29 1:00 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
` (8 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
It is not used anymore.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 30 ------------------------------
1 file changed, 30 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 5ca5b2b53bc1..5a40f67201da 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -681,36 +681,6 @@ int drm_pagemap_init(struct drm_pagemap *dpagemap,
}
EXPORT_SYMBOL(drm_pagemap_init);
-/**
- * drm_pagemap_create() - Create a struct drm_pagemap.
- * @drm: Pointer to a struct drm_device providing the device-private memory.
- * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
- * @ops: Pointer to the struct drm_pagemap_ops.
- *
- * Allocate and initialize a struct drm_pagemap.
- *
- * Return: A refcounted pointer to a struct drm_pagemap on success.
- * Error pointer on error.
- */
-struct drm_pagemap *
-drm_pagemap_create(struct drm_device *drm,
- struct dev_pagemap *pagemap,
- const struct drm_pagemap_ops *ops)
-{
- struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
- int err;
-
- if (!dpagemap)
- return ERR_PTR(-ENOMEM);
-
- err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
- if (err)
- return ERR_PTR(err);
-
- return dpagemap;
-}
-EXPORT_SYMBOL(drm_pagemap_create);
-
/**
* drm_pagemap_put() - Put a struct drm_pagemap reference
* @dpagemap: Pointer to a struct drm_pagemap object.
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface
2025-10-25 12:04 ` [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface Thomas Hellström
@ 2025-10-29 1:00 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 1:00 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:03PM +0200, Thomas Hellström wrote:
> It is not used anymore.
>
Maybe slightly better commit message or just squash into previous patch.
Anyways:
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/drm_pagemap.c | 30 ------------------------------
> 1 file changed, 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
> index 5ca5b2b53bc1..5a40f67201da 100644
> --- a/drivers/gpu/drm/drm_pagemap.c
> +++ b/drivers/gpu/drm/drm_pagemap.c
> @@ -681,36 +681,6 @@ int drm_pagemap_init(struct drm_pagemap *dpagemap,
> }
> EXPORT_SYMBOL(drm_pagemap_init);
>
> -/**
> - * drm_pagemap_create() - Create a struct drm_pagemap.
> - * @drm: Pointer to a struct drm_device providing the device-private memory.
> - * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
> - * @ops: Pointer to the struct drm_pagemap_ops.
> - *
> - * Allocate and initialize a struct drm_pagemap.
> - *
> - * Return: A refcounted pointer to a struct drm_pagemap on success.
> - * Error pointer on error.
> - */
> -struct drm_pagemap *
> -drm_pagemap_create(struct drm_device *drm,
> - struct dev_pagemap *pagemap,
> - const struct drm_pagemap_ops *ops)
> -{
> - struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
> - int err;
> -
> - if (!dpagemap)
> - return ERR_PTR(-ENOMEM);
> -
> - err = drm_pagemap_init(dpagemap, pagemap, drm, ops);
> - if (err)
> - return ERR_PTR(err);
> -
> - return dpagemap;
> -}
> -EXPORT_SYMBOL(drm_pagemap_create);
> -
> /**
> * drm_pagemap_put() - Put a struct drm_pagemap reference
> * @dpagemap: Pointer to a struct drm_pagemap object.
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (5 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-29 1:21 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
` (7 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
The hmm_range_fault() and the migration helpers currently need a common
"owner" to identify pagemaps and clients with fast interconnect.
Add a drm_pagemap utility to setup such owners by registering
drm_pagemaps, in a registry, and for each new drm_pagemap,
query which existing drm_pagemaps have fast interconnects with the new
drm_pagemap.
The "owner" scheme is limited in that it is static at drm_pagemap creation.
Ideally one would want the owner to be adjusted at run-time, but that
requires changes to hmm. If the proposed scheme becomes too limited,
we need to revisit.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap_util.c | 118 +++++++++++++++++++++++++++++
include/drm/drm_pagemap_util.h | 53 +++++++++++++
2 files changed, 171 insertions(+)
diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
index e1a1d6bf25f4..dd573b620157 100644
--- a/drivers/gpu/drm/drm_pagemap_util.c
+++ b/drivers/gpu/drm/drm_pagemap_util.c
@@ -3,6 +3,8 @@
* Copyright © 2025 Intel Corporation
*/
+#include <linux/slab.h>
+
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
#include <drm/drm_pagemap.h>
@@ -424,3 +426,119 @@ struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device
return shrinker;
}
EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
+
+/**
+ * struct drm_pagemap_owner - Device interconnect group
+ * @kref: Reference count.
+ *
+ * A struct drm_pagemap_owner identifies a device interconnect group.
+ */
+struct drm_pagemap_owner {
+ struct kref kref;
+};
+
+static void drm_pagemap_owner_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct drm_pagemap_owner, kref));
+}
+
+/**
+ * drm_pagemap_release_owner() - Stop participating in an interconnect group
+ * @peer: Pointer to the struct drm_pagemap_peer used when joining the group
+ *
+ * Stop participating in an interconnect group. This function is typically
+ * called when a pagemap is removed to indicate that it doesn't need to
+ * be taken into account.
+ */
+void drm_pagemap_release_owner(struct drm_pagemap_peer *peer)
+{
+ struct drm_pagemap_owner_list *owner_list = peer->list;
+
+ if (!owner_list)
+ return;
+
+ mutex_lock(&owner_list->lock);
+ list_del(&peer->link);
+ kref_put(&peer->owner->kref, drm_pagemap_owner_release);
+ peer->owner = NULL;
+ mutex_unlock(&owner_list->lock);
+}
+EXPORT_SYMBOL(drm_pagemap_release_owner);
+
+/**
+ * typedef interconnect_fn - Callback function to identify fast interconnects
+ * @peer1: First endpoint.
+ * @peer2: Second endpont.
+ *
+ * The function returns %true iff @peer1 and @peer2 have a fast interconnect.
+ * Note that this is symmetrical. The function has no notion of client and provider,
+ * which may not be sufficient in some cases. However, since the callback is intended
+ * to guide in providing common pagemap owners, the notion of a common owner to
+ * indicate fast interconnects would then have to change as well.
+ *
+ * Return: %true iff @peer1 and @peer2 have a fast interconnect. Otherwise @false.
+ */
+typedef bool (*interconnect_fn)(struct drm_pagemap_peer *peer1, struct drm_pagemap_peer *peer2);
+
+/**
+ * drm_pagemap_acquire_owner() - Join an interconnect group
+ * @peer: A struct drm_pagemap_peer keeping track of the device interconnect
+ * @owner_list: Pointer to the owner_list, keeping track of all interconnects
+ * @has_interconnect: Callback function to determine whether two peers have a
+ * fast local interconnect.
+ *
+ * Repeatedly calls @has_interconnect for @peer and other peers on @owner_list to
+ * determine a set of peers for which @peer has a fast interconnect. That set will
+ * have common &struct drm_pagemap_owner, and upon successful return, @peer::owner
+ * will point to that struct, holding a reference, and @peer will be registered in
+ * @owner_list. If @peer doesn't have any fast interconnects to other @peers, a
+ * new unique &struct drm_pagemap_owner will be allocated for it, and that
+ * may be shared with other peers that, at a later point, are determined to have
+ * a fast interconnect with @peer.
+ *
+ * When @peer no longer participates in an interconnect group,
+ * drm_pagemap_release_owner() should be called to drop the reference on the
+ * struct drm_pagemap_owner.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
+ struct drm_pagemap_owner_list *owner_list,
+ interconnect_fn has_interconnect)
+{
+ struct drm_pagemap_peer *cur_peer;
+ struct drm_pagemap_owner *owner = NULL;
+ bool interconnect = false;
+
+ mutex_lock(&owner_list->lock);
+ might_alloc(GFP_KERNEL);
+ list_for_each_entry(cur_peer, &owner_list->peers, link) {
+ if (cur_peer->owner != owner) {
+ if (owner && interconnect)
+ break;
+ owner = cur_peer->owner;
+ interconnect = true;
+ }
+ if (interconnect && !has_interconnect(peer, cur_peer))
+ interconnect = false;
+ }
+
+ if (!interconnect) {
+ owner = kmalloc(sizeof(*owner), GFP_KERNEL);
+ if (!owner) {
+ mutex_unlock(&owner_list->lock);
+ return -ENOMEM;
+ }
+ kref_init(&owner->kref);
+ list_add_tail(&peer->link, &owner_list->peers);
+ } else {
+ kref_get(&owner->kref);
+ list_add_tail(&peer->link, &cur_peer->link);
+ }
+ peer->owner = owner;
+ peer->list = owner_list;
+ mutex_unlock(&owner_list->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(drm_pagemap_acquire_owner);
diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
index 292244d429ee..1889630b8950 100644
--- a/include/drm/drm_pagemap_util.h
+++ b/include/drm/drm_pagemap_util.h
@@ -1,12 +1,58 @@
/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
#ifndef _DRM_PAGEMAP_UTIL_H_
#define _DRM_PAGEMAP_UTIL_H_
+#include <linux/list.h>
+#include <linux/mutex.h>
+
struct drm_device;
struct drm_pagemap;
struct drm_pagemap_cache;
+struct drm_pagemap_owner;
struct drm_pagemap_shrinker;
+/**
+ * struct drm_pagemap_peer - Structure representing a fast interconnect peer
+ * @list: Pointer to a &struct drm_pagemap_owner_list used to keep track of peers
+ * @link: List link for @list's list of peers.
+ * @owner: Pointer to a &struct drm_pagemap_owner, common for a set of peers having
+ * fast interconnects.
+ * @private: Pointer private to the struct embedding this struct.
+ */
+struct drm_pagemap_peer {
+ struct drm_pagemap_owner_list *list;
+ struct list_head link;
+ struct drm_pagemap_owner *owner;
+ void *private;
+};
+
+/**
+ * struct drm_pagemap_owner_list - Keeping track of peers and owners
+ * @peer: List of peers.
+ *
+ * The owner list defines the scope where we identify peers having fast interconnects
+ * and a common owner. Typically a driver has a single global owner list to
+ * keep track of common owners for the driver's pagemaps.
+ */
+struct drm_pagemap_owner_list {
+ /** @lock: Mutex protecting the @peers list. */
+ struct mutex lock;
+ /** @peers: List of peers. */
+ struct list_head peers;
+};
+
+/*
+ * Convenience macro to define an owner list.
+ */
+#define DRM_PAGEMAP_OWNER_LIST_DEFINE(_name) \
+ struct drm_pagemap_owner_list _name = { \
+ .lock = __MUTEX_INITIALIZER(_name.lock), \
+ .peers = LIST_HEAD_INIT(_name.peers) }
+
void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
@@ -22,4 +68,11 @@ struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
+
+void drm_pagemap_release_owner(struct drm_pagemap_peer *peer);
+
+int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
+ struct drm_pagemap_owner_list *owner_list,
+ bool (*has_interconnect)(struct drm_pagemap_peer *peer1,
+ struct drm_pagemap_peer *peer2));
#endif
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus
2025-10-25 12:04 ` [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
@ 2025-10-29 1:21 ` Matthew Brost
2025-10-29 14:52 ` Thomas Hellström
0 siblings, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 1:21 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:04PM +0200, Thomas Hellström wrote:
> The hmm_range_fault() and the migration helpers currently need a common
> "owner" to identify pagemaps and clients with fast interconnect.
> Add a drm_pagemap utility to setup such owners by registering
> drm_pagemaps, in a registry, and for each new drm_pagemap,
> query which existing drm_pagemaps have fast interconnects with the new
> drm_pagemap.
>
> The "owner" scheme is limited in that it is static at drm_pagemap creation.
> Ideally one would want the owner to be adjusted at run-time, but that
> requires changes to hmm. If the proposed scheme becomes too limited,
> we need to revisit.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/drm_pagemap_util.c | 118 +++++++++++++++++++++++++++++
> include/drm/drm_pagemap_util.h | 53 +++++++++++++
> 2 files changed, 171 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
> index e1a1d6bf25f4..dd573b620157 100644
> --- a/drivers/gpu/drm/drm_pagemap_util.c
> +++ b/drivers/gpu/drm/drm_pagemap_util.c
> @@ -3,6 +3,8 @@
> * Copyright © 2025 Intel Corporation
> */
>
> +#include <linux/slab.h>
> +
> #include <drm/drm_drv.h>
> #include <drm/drm_managed.h>
> #include <drm/drm_pagemap.h>
> @@ -424,3 +426,119 @@ struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device
> return shrinker;
> }
> EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> +
> +/**
> + * struct drm_pagemap_owner - Device interconnect group
> + * @kref: Reference count.
> + *
> + * A struct drm_pagemap_owner identifies a device interconnect group.
> + */
> +struct drm_pagemap_owner {
> + struct kref kref;
> +};
> +
> +static void drm_pagemap_owner_release(struct kref *kref)
> +{
> + kfree(container_of(kref, struct drm_pagemap_owner, kref));
> +}
> +
> +/**
> + * drm_pagemap_release_owner() - Stop participating in an interconnect group
> + * @peer: Pointer to the struct drm_pagemap_peer used when joining the group
> + *
> + * Stop participating in an interconnect group. This function is typically
> + * called when a pagemap is removed to indicate that it doesn't need to
> + * be taken into account.
> + */
> +void drm_pagemap_release_owner(struct drm_pagemap_peer *peer)
> +{
> + struct drm_pagemap_owner_list *owner_list = peer->list;
> +
> + if (!owner_list)
> + return;
> +
> + mutex_lock(&owner_list->lock);
> + list_del(&peer->link);
> + kref_put(&peer->owner->kref, drm_pagemap_owner_release);
> + peer->owner = NULL;
> + mutex_unlock(&owner_list->lock);
> +}
> +EXPORT_SYMBOL(drm_pagemap_release_owner);
> +
> +/**
> + * typedef interconnect_fn - Callback function to identify fast interconnects
> + * @peer1: First endpoint.
> + * @peer2: Second endpont.
> + *
> + * The function returns %true iff @peer1 and @peer2 have a fast interconnect.
> + * Note that this is symmetrical. The function has no notion of client and provider,
> + * which may not be sufficient in some cases. However, since the callback is intended
> + * to guide in providing common pagemap owners, the notion of a common owner to
> + * indicate fast interconnects would then have to change as well.
> + *
> + * Return: %true iff @peer1 and @peer2 have a fast interconnect. Otherwise @false.
> + */
> +typedef bool (*interconnect_fn)(struct drm_pagemap_peer *peer1, struct drm_pagemap_peer *peer2);
> +
> +/**
> + * drm_pagemap_acquire_owner() - Join an interconnect group
> + * @peer: A struct drm_pagemap_peer keeping track of the device interconnect
> + * @owner_list: Pointer to the owner_list, keeping track of all interconnects
> + * @has_interconnect: Callback function to determine whether two peers have a
> + * fast local interconnect.
> + *
> + * Repeatedly calls @has_interconnect for @peer and other peers on @owner_list to
> + * determine a set of peers for which @peer has a fast interconnect. That set will
> + * have common &struct drm_pagemap_owner, and upon successful return, @peer::owner
> + * will point to that struct, holding a reference, and @peer will be registered in
> + * @owner_list. If @peer doesn't have any fast interconnects to other @peers, a
> + * new unique &struct drm_pagemap_owner will be allocated for it, and that
> + * may be shared with other peers that, at a later point, are determined to have
> + * a fast interconnect with @peer.
> + *
> + * When @peer no longer participates in an interconnect group,
> + * drm_pagemap_release_owner() should be called to drop the reference on the
> + * struct drm_pagemap_owner.
> + *
> + * Return: %0 on success, negative error code on failure.
> + */
> +int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
> + struct drm_pagemap_owner_list *owner_list,
> + interconnect_fn has_interconnect)
> +{
> + struct drm_pagemap_peer *cur_peer;
> + struct drm_pagemap_owner *owner = NULL;
> + bool interconnect = false;
> +
> + mutex_lock(&owner_list->lock);
> + might_alloc(GFP_KERNEL);
> + list_for_each_entry(cur_peer, &owner_list->peers, link) {
> + if (cur_peer->owner != owner) {
> + if (owner && interconnect)
> + break;
> + owner = cur_peer->owner;
> + interconnect = true;
> + }
> + if (interconnect && !has_interconnect(peer, cur_peer))
> + interconnect = false;
> + }
> +
> + if (!interconnect) {
> + owner = kmalloc(sizeof(*owner), GFP_KERNEL);
> + if (!owner) {
> + mutex_unlock(&owner_list->lock);
> + return -ENOMEM;
> + }
> + kref_init(&owner->kref);
> + list_add_tail(&peer->link, &owner_list->peers);
> + } else {
> + kref_get(&owner->kref);
> + list_add_tail(&peer->link, &cur_peer->link);
> + }
> + peer->owner = owner;
> + peer->list = owner_list;
> + mutex_unlock(&owner_list->lock);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(drm_pagemap_acquire_owner);
> diff --git a/include/drm/drm_pagemap_util.h b/include/drm/drm_pagemap_util.h
> index 292244d429ee..1889630b8950 100644
> --- a/include/drm/drm_pagemap_util.h
> +++ b/include/drm/drm_pagemap_util.h
> @@ -1,12 +1,58 @@
> /* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
Nit: The above copyright should be moved to an earlier patch.
> #ifndef _DRM_PAGEMAP_UTIL_H_
> #define _DRM_PAGEMAP_UTIL_H_
>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +
> struct drm_device;
> struct drm_pagemap;
> struct drm_pagemap_cache;
> +struct drm_pagemap_owner;
> struct drm_pagemap_shrinker;
>
> +/**
> + * struct drm_pagemap_peer - Structure representing a fast interconnect peer
> + * @list: Pointer to a &struct drm_pagemap_owner_list used to keep track of peers
> + * @link: List link for @list's list of peers.
> + * @owner: Pointer to a &struct drm_pagemap_owner, common for a set of peers having
> + * fast interconnects.
> + * @private: Pointer private to the struct embedding this struct.
> + */
> +struct drm_pagemap_peer {
> + struct drm_pagemap_owner_list *list;
> + struct list_head link;
> + struct drm_pagemap_owner *owner;
> + void *private;
> +};
> +
> +/**
> + * struct drm_pagemap_owner_list - Keeping track of peers and owners
> + * @peer: List of peers.
> + *
> + * The owner list defines the scope where we identify peers having fast interconnects
> + * and a common owner. Typically a driver has a single global owner list to
> + * keep track of common owners for the driver's pagemaps.
> + */
> +struct drm_pagemap_owner_list {
> + /** @lock: Mutex protecting the @peers list. */
> + struct mutex lock;
> + /** @peers: List of peers. */
> + struct list_head peers;
> +};
> +
> +/*
> + * Convenience macro to define an owner list.
I'd perhaps mention this typially a static module instantiation.
Patch itself lgtm, and makes sense. With that:
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> + */
> +#define DRM_PAGEMAP_OWNER_LIST_DEFINE(_name) \
> + struct drm_pagemap_owner_list _name = { \
> + .lock = __MUTEX_INITIALIZER(_name.lock), \
> + .peers = LIST_HEAD_INIT(_name.peers) }
> +
> void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
>
> int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache);
> @@ -22,4 +68,11 @@ struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
> void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap);
>
> struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache);
> +
> +void drm_pagemap_release_owner(struct drm_pagemap_peer *peer);
> +
> +int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
> + struct drm_pagemap_owner_list *owner_list,
> + bool (*has_interconnect)(struct drm_pagemap_peer *peer1,
> + struct drm_pagemap_peer *peer2));
> #endif
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus
2025-10-29 1:21 ` Matthew Brost
@ 2025-10-29 14:52 ` Thomas Hellström
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-29 14:52 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 18:21 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:04PM +0200, Thomas Hellström wrote:
> > The hmm_range_fault() and the migration helpers currently need a
> > common
> > "owner" to identify pagemaps and clients with fast interconnect.
> > Add a drm_pagemap utility to setup such owners by registering
> > drm_pagemaps, in a registry, and for each new drm_pagemap,
> > query which existing drm_pagemaps have fast interconnects with the
> > new
> > drm_pagemap.
> >
> > The "owner" scheme is limited in that it is static at drm_pagemap
> > creation.
> > Ideally one would want the owner to be adjusted at run-time, but
> > that
> > requires changes to hmm. If the proposed scheme becomes too
> > limited,
> > we need to revisit.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/drm_pagemap_util.c | 118
> > +++++++++++++++++++++++++++++
> > include/drm/drm_pagemap_util.h | 53 +++++++++++++
> > 2 files changed, 171 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/drm_pagemap_util.c
> > b/drivers/gpu/drm/drm_pagemap_util.c
> > index e1a1d6bf25f4..dd573b620157 100644
> > --- a/drivers/gpu/drm/drm_pagemap_util.c
> > +++ b/drivers/gpu/drm/drm_pagemap_util.c
> > @@ -3,6 +3,8 @@
> > * Copyright © 2025 Intel Corporation
> > */
> >
> > +#include <linux/slab.h>
> > +
> > #include <drm/drm_drv.h>
> > #include <drm/drm_managed.h>
> > #include <drm/drm_pagemap.h>
> > @@ -424,3 +426,119 @@ struct drm_pagemap_shrinker
> > *drm_pagemap_shrinker_create_devm(struct drm_device
> > return shrinker;
> > }
> > EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
> > +
> > +/**
> > + * struct drm_pagemap_owner - Device interconnect group
> > + * @kref: Reference count.
> > + *
> > + * A struct drm_pagemap_owner identifies a device interconnect
> > group.
> > + */
> > +struct drm_pagemap_owner {
> > + struct kref kref;
> > +};
> > +
> > +static void drm_pagemap_owner_release(struct kref *kref)
> > +{
> > + kfree(container_of(kref, struct drm_pagemap_owner, kref));
> > +}
> > +
> > +/**
> > + * drm_pagemap_release_owner() - Stop participating in an
> > interconnect group
> > + * @peer: Pointer to the struct drm_pagemap_peer used when joining
> > the group
> > + *
> > + * Stop participating in an interconnect group. This function is
> > typically
> > + * called when a pagemap is removed to indicate that it doesn't
> > need to
> > + * be taken into account.
> > + */
> > +void drm_pagemap_release_owner(struct drm_pagemap_peer *peer)
> > +{
> > + struct drm_pagemap_owner_list *owner_list = peer->list;
> > +
> > + if (!owner_list)
> > + return;
> > +
> > + mutex_lock(&owner_list->lock);
> > + list_del(&peer->link);
> > + kref_put(&peer->owner->kref, drm_pagemap_owner_release);
> > + peer->owner = NULL;
> > + mutex_unlock(&owner_list->lock);
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_release_owner);
> > +
> > +/**
> > + * typedef interconnect_fn - Callback function to identify fast
> > interconnects
> > + * @peer1: First endpoint.
> > + * @peer2: Second endpont.
> > + *
> > + * The function returns %true iff @peer1 and @peer2 have a fast
> > interconnect.
> > + * Note that this is symmetrical. The function has no notion of
> > client and provider,
> > + * which may not be sufficient in some cases. However, since the
> > callback is intended
> > + * to guide in providing common pagemap owners, the notion of a
> > common owner to
> > + * indicate fast interconnects would then have to change as well.
> > + *
> > + * Return: %true iff @peer1 and @peer2 have a fast interconnect.
> > Otherwise @false.
> > + */
> > +typedef bool (*interconnect_fn)(struct drm_pagemap_peer *peer1,
> > struct drm_pagemap_peer *peer2);
> > +
> > +/**
> > + * drm_pagemap_acquire_owner() - Join an interconnect group
> > + * @peer: A struct drm_pagemap_peer keeping track of the device
> > interconnect
> > + * @owner_list: Pointer to the owner_list, keeping track of all
> > interconnects
> > + * @has_interconnect: Callback function to determine whether two
> > peers have a
> > + * fast local interconnect.
> > + *
> > + * Repeatedly calls @has_interconnect for @peer and other peers on
> > @owner_list to
> > + * determine a set of peers for which @peer has a fast
> > interconnect. That set will
> > + * have common &struct drm_pagemap_owner, and upon successful
> > return, @peer::owner
> > + * will point to that struct, holding a reference, and @peer will
> > be registered in
> > + * @owner_list. If @peer doesn't have any fast interconnects to
> > other @peers, a
> > + * new unique &struct drm_pagemap_owner will be allocated for it,
> > and that
> > + * may be shared with other peers that, at a later point, are
> > determined to have
> > + * a fast interconnect with @peer.
> > + *
> > + * When @peer no longer participates in an interconnect group,
> > + * drm_pagemap_release_owner() should be called to drop the
> > reference on the
> > + * struct drm_pagemap_owner.
> > + *
> > + * Return: %0 on success, negative error code on failure.
> > + */
> > +int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
> > + struct drm_pagemap_owner_list
> > *owner_list,
> > + interconnect_fn has_interconnect)
> > +{
> > + struct drm_pagemap_peer *cur_peer;
> > + struct drm_pagemap_owner *owner = NULL;
> > + bool interconnect = false;
> > +
> > + mutex_lock(&owner_list->lock);
> > + might_alloc(GFP_KERNEL);
> > + list_for_each_entry(cur_peer, &owner_list->peers, link) {
> > + if (cur_peer->owner != owner) {
> > + if (owner && interconnect)
> > + break;
> > + owner = cur_peer->owner;
> > + interconnect = true;
> > + }
> > + if (interconnect && !has_interconnect(peer,
> > cur_peer))
> > + interconnect = false;
> > + }
> > +
> > + if (!interconnect) {
> > + owner = kmalloc(sizeof(*owner), GFP_KERNEL);
> > + if (!owner) {
> > + mutex_unlock(&owner_list->lock);
> > + return -ENOMEM;
> > + }
> > + kref_init(&owner->kref);
> > + list_add_tail(&peer->link, &owner_list->peers);
> > + } else {
> > + kref_get(&owner->kref);
> > + list_add_tail(&peer->link, &cur_peer->link);
> > + }
> > + peer->owner = owner;
> > + peer->list = owner_list;
> > + mutex_unlock(&owner_list->lock);
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(drm_pagemap_acquire_owner);
> > diff --git a/include/drm/drm_pagemap_util.h
> > b/include/drm/drm_pagemap_util.h
> > index 292244d429ee..1889630b8950 100644
> > --- a/include/drm/drm_pagemap_util.h
> > +++ b/include/drm/drm_pagemap_util.h
> > @@ -1,12 +1,58 @@
> > /* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2025 Intel Corporation
> > + */
> > +
>
> Nit: The above copyright should be moved to an earlier patch.
OK yes.
>
>
>
> > #ifndef _DRM_PAGEMAP_UTIL_H_
> > #define _DRM_PAGEMAP_UTIL_H_
> >
> > +#include <linux/list.h>
> > +#include <linux/mutex.h>
> > +
> > struct drm_device;
> > struct drm_pagemap;
> > struct drm_pagemap_cache;
> > +struct drm_pagemap_owner;
> > struct drm_pagemap_shrinker;
> >
> > +/**
> > + * struct drm_pagemap_peer - Structure representing a fast
> > interconnect peer
> > + * @list: Pointer to a &struct drm_pagemap_owner_list used to keep
> > track of peers
> > + * @link: List link for @list's list of peers.
> > + * @owner: Pointer to a &struct drm_pagemap_owner, common for a
> > set of peers having
> > + * fast interconnects.
> > + * @private: Pointer private to the struct embedding this struct.
> > + */
> > +struct drm_pagemap_peer {
> > + struct drm_pagemap_owner_list *list;
> > + struct list_head link;
> > + struct drm_pagemap_owner *owner;
> > + void *private;
> > +};
> > +
> > +/**
> > + * struct drm_pagemap_owner_list - Keeping track of peers and
> > owners
> > + * @peer: List of peers.
> > + *
> > + * The owner list defines the scope where we identify peers having
> > fast interconnects
> > + * and a common owner. Typically a driver has a single global
> > owner list to
> > + * keep track of common owners for the driver's pagemaps.
> > + */
> > +struct drm_pagemap_owner_list {
> > + /** @lock: Mutex protecting the @peers list. */
> > + struct mutex lock;
> > + /** @peers: List of peers. */
> > + struct list_head peers;
> > +};
> > +
> > +/*
> > + * Convenience macro to define an owner list.
>
> I'd perhaps mention this typially a static module instantiation.
Sure.
>
> Patch itself lgtm, and makes sense. With that:
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
>
> > + */
> > +#define DRM_PAGEMAP_OWNER_LIST_DEFINE(_name) \
> > + struct drm_pagemap_owner_list _name = { \
> > + .lock = __MUTEX_INITIALIZER(_name.lock), \
> > + .peers = LIST_HEAD_INIT(_name.peers) }
> > +
> > void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap);
> >
> > int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache
> > *cache);
> > @@ -22,4 +68,11 @@ struct drm_pagemap
> > *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache);
> > void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache
> > *cache, struct drm_pagemap *dpagemap);
> >
> > struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct
> > drm_pagemap_cache *cache);
> > +
> > +void drm_pagemap_release_owner(struct drm_pagemap_peer *peer);
> > +
> > +int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
> > + struct drm_pagemap_owner_list
> > *owner_list,
> > + bool (*has_interconnect)(struct
> > drm_pagemap_peer *peer1,
> > + struct
> > drm_pagemap_peer *peer2));
> > #endif
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (6 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-27 23:02 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes Thomas Hellström
` (6 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Register a driver-wide owner list, provide a callback to identify
fast interconnects and use the drm_pagemap_util helper to allocate
or reuse a suitable owner struct. For now we consider pagemaps on
different tiles on the same device as having fast interconnect and
thus the same owner.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_svm.c | 57 ++++++++++++++++++++++++++++----
drivers/gpu/drm/xe/xe_svm.h | 24 ++++++--------
drivers/gpu/drm/xe/xe_userptr.c | 2 +-
drivers/gpu/drm/xe/xe_vm.c | 2 +-
drivers/gpu/drm/xe/xe_vm_types.h | 3 ++
5 files changed, 66 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 8aced064c83f..d27e366f8e14 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -22,8 +22,17 @@
#include "xe_vm_types.h"
#include "xe_vram_types.h"
+/* Identifies subclasses of struct drm_pagemap_peer */
+#define XE_PEER_PAGEMAP ((void *)0ul)
+#define XE_PEER_VM ((void *)1ul)
+
static int xe_svm_get_pagemaps(struct xe_vm *vm);
+void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
+{
+ return force_smem ? NULL : vm->svm.peer.owner;
+}
+
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
/*
@@ -770,6 +779,25 @@ static void xe_svm_put_pagemaps(struct xe_vm *vm)
}
}
+static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer)
+{
+ if (peer->private == XE_PEER_PAGEMAP)
+ return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev;
+
+ return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev;
+}
+
+static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
+ struct drm_pagemap_peer *peer2)
+{
+ struct device *dev1 = xe_peer_to_dev(peer1);
+ struct device *dev2 = xe_peer_to_dev(peer2);
+
+ return dev1 == dev2;
+}
+
+static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
+
/**
* xe_svm_init() - SVM initialize
* @vm: The VM.
@@ -788,10 +816,18 @@ int xe_svm_init(struct xe_vm *vm)
INIT_WORK(&vm->svm.garbage_collector.work,
xe_svm_garbage_collector_work_func);
- err = xe_svm_get_pagemaps(vm);
+ vm->svm.peer.private = XE_PEER_VM;
+ err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list,
+ xe_has_interconnect);
if (err)
return err;
+ err = xe_svm_get_pagemaps(vm);
+ if (err) {
+ drm_pagemap_release_owner(&vm->svm.peer);
+ return err;
+ }
+
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
current->mm, 0, vm->size,
xe_modparam.svm_notifier_size * SZ_1M,
@@ -801,6 +837,7 @@ int xe_svm_init(struct xe_vm *vm)
if (err) {
xe_svm_put_pagemaps(vm);
+ drm_pagemap_release_owner(&vm->svm.peer);
return err;
}
} else {
@@ -823,6 +860,7 @@ void xe_svm_close(struct xe_vm *vm)
xe_assert(vm->xe, xe_vm_is_closed(vm));
flush_work(&vm->svm.garbage_collector.work);
xe_svm_put_pagemaps(vm);
+ drm_pagemap_release_owner(&vm->svm.peer);
}
/**
@@ -957,7 +995,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
xe_pm_runtime_get_noresume(xe);
err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
start, end, timeslice_ms,
- xe_svm_devm_owner(xe));
+ xpagemap->pagemap.owner);
if (err)
xe_svm_devmem_release(&bo->devmem_allocation);
xe_bo_unlock(bo);
@@ -1072,7 +1110,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
.devmem_only = need_vram && devmem_possible,
.timeslice_ms = need_vram && devmem_possible ?
vm->xe->atomic_svm_timeslice_ms : 0,
- .device_private_page_owner = xe_svm_devm_owner(vm->xe),
};
struct xe_validation_ctx vctx;
struct drm_exec exec;
@@ -1096,8 +1133,8 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
return err;
dpagemap = xe_vma_resolve_pagemap(vma, tile);
- if (!dpagemap && !ctx.devmem_only)
- ctx.device_private_page_owner = NULL;
+ ctx.device_private_page_owner =
+ xe_svm_private_page_owner(vm, !dpagemap && !ctx.devmem_only);
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
if (IS_ERR(range))
@@ -1521,6 +1558,8 @@ static void xe_pagemap_destroy_work(struct work_struct *work)
pagemap->range.end - pagemap->range.start + 1);
drm_dev_exit(idx);
}
+
+ drm_pagemap_release_owner(&xpagemap->peer);
kfree(xpagemap);
}
@@ -1571,6 +1610,7 @@ static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram
dpagemap = &xpagemap->dpagemap;
INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
xpagemap->vr = vr;
+ xpagemap->peer.private = XE_PEER_PAGEMAP;
err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
if (err)
@@ -1583,11 +1623,16 @@ static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram
goto out_err;
}
+ err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list,
+ xe_has_interconnect);
+ if (err)
+ goto out_err;
+
pagemap->type = MEMORY_DEVICE_PRIVATE;
pagemap->range.start = res->start;
pagemap->range.end = res->end;
pagemap->nr_range = 1;
- pagemap->owner = xe_svm_devm_owner(xe);
+ pagemap->owner = xpagemap->peer.owner;
pagemap->ops = drm_pagemap_pagemap_ops_get();
addr = devm_memremap_pages(dev, pagemap);
if (IS_ERR(addr)) {
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 8a49ff17ef0c..5adce108f7eb 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,24 +6,11 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
-struct xe_device;
-
-/**
- * xe_svm_devm_owner() - Return the owner of device private memory
- * @xe: The xe device.
- *
- * Return: The owner of this device's device private memory to use in
- * hmm_range_fault()-
- */
-static inline void *xe_svm_devm_owner(struct xe_device *xe)
-{
- return xe;
-}
-
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
#include <drm/drm_gpusvm.h>
+#include <drm/drm_pagemap_util.h>
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
@@ -65,6 +52,7 @@ struct xe_svm_range {
* @pagemap: The struct dev_pagemap providing the struct pages.
* @dpagemap: The drm_pagemap managing allocation and migration.
* @destroy_work: Handles asnynchronous destruction and caching.
+ * @peer: Used for pagemap owner computation.
* @hpa_base: The host physical address base for the managemd memory.
* @vr: Backpointer to the xe_vram region.
*/
@@ -72,6 +60,7 @@ struct xe_pagemap {
struct dev_pagemap pagemap;
struct drm_pagemap dpagemap;
struct work_struct destroy_work;
+ struct drm_pagemap_peer peer;
resource_size_t hpa_base;
struct xe_vram_region *vr;
};
@@ -131,6 +120,8 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end);
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile);
+void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem);
+
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
@@ -368,6 +359,11 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
return NULL;
}
+static inline void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
+{
+ return NULL;
+}
+
static inline void xe_svm_flush(struct xe_vm *vm)
{
}
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index 0d9130b1958a..e120323c43bc 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -55,7 +55,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
struct xe_device *xe = vm->xe;
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
- .device_private_page_owner = xe_svm_devm_owner(xe),
+ .device_private_page_owner = xe_svm_private_page_owner(vm, false),
.allow_mixed = true,
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 10d77666a425..1dffcd9ab61b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2886,7 +2886,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
- ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
+ ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !tile);
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 9f0d8bf1af4f..4f9a6cdb5d02 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -8,6 +8,7 @@
#include <drm/drm_gpusvm.h>
#include <drm/drm_gpuvm.h>
+#include <drm/drm_pagemap_util.h>
#include <linux/dma-resv.h>
#include <linux/kref.h>
@@ -192,6 +193,8 @@ struct xe_vm {
struct work_struct work;
} garbage_collector;
struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE];
+ /** @svm.peer: Used for pagemap connectivity computations. */
+ struct drm_pagemap_peer peer;
} svm;
struct xe_device *xe;
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner
2025-10-25 12:04 ` [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
@ 2025-10-27 23:02 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-27 23:02 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:05PM +0200, Thomas Hellström wrote:
> Register a driver-wide owner list, provide a callback to identify
> fast interconnects and use the drm_pagemap_util helper to allocate
> or reuse a suitable owner struct. For now we consider pagemaps on
> different tiles on the same device as having fast interconnect and
> thus the same owner.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 57 ++++++++++++++++++++++++++++----
> drivers/gpu/drm/xe/xe_svm.h | 24 ++++++--------
> drivers/gpu/drm/xe/xe_userptr.c | 2 +-
> drivers/gpu/drm/xe/xe_vm.c | 2 +-
> drivers/gpu/drm/xe/xe_vm_types.h | 3 ++
> 5 files changed, 66 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 8aced064c83f..d27e366f8e14 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -22,8 +22,17 @@
> #include "xe_vm_types.h"
> #include "xe_vram_types.h"
>
> +/* Identifies subclasses of struct drm_pagemap_peer */
> +#define XE_PEER_PAGEMAP ((void *)0ul)
> +#define XE_PEER_VM ((void *)1ul)
> +
> static int xe_svm_get_pagemaps(struct xe_vm *vm);
>
> +void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
> +{
> + return force_smem ? NULL : vm->svm.peer.owner;
> +}
> +
> static bool xe_svm_range_in_vram(struct xe_svm_range *range)
> {
> /*
> @@ -770,6 +779,25 @@ static void xe_svm_put_pagemaps(struct xe_vm *vm)
> }
> }
>
> +static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer)
> +{
> + if (peer->private == XE_PEER_PAGEMAP)
> + return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev;
> +
> + return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev;
> +}
> +
> +static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
> + struct drm_pagemap_peer *peer2)
> +{
> + struct device *dev1 = xe_peer_to_dev(peer1);
> + struct device *dev2 = xe_peer_to_dev(peer2);
> +
> + return dev1 == dev2;
> +}
> +
> +static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
> +
> /**
> * xe_svm_init() - SVM initialize
> * @vm: The VM.
> @@ -788,10 +816,18 @@ int xe_svm_init(struct xe_vm *vm)
> INIT_WORK(&vm->svm.garbage_collector.work,
> xe_svm_garbage_collector_work_func);
>
> - err = xe_svm_get_pagemaps(vm);
> + vm->svm.peer.private = XE_PEER_VM;
> + err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list,
> + xe_has_interconnect);
> if (err)
> return err;
>
> + err = xe_svm_get_pagemaps(vm);
> + if (err) {
> + drm_pagemap_release_owner(&vm->svm.peer);
> + return err;
> + }
> +
> err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
> current->mm, 0, vm->size,
> xe_modparam.svm_notifier_size * SZ_1M,
> @@ -801,6 +837,7 @@ int xe_svm_init(struct xe_vm *vm)
>
> if (err) {
> xe_svm_put_pagemaps(vm);
> + drm_pagemap_release_owner(&vm->svm.peer);
> return err;
> }
> } else {
> @@ -823,6 +860,7 @@ void xe_svm_close(struct xe_vm *vm)
> xe_assert(vm->xe, xe_vm_is_closed(vm));
> flush_work(&vm->svm.garbage_collector.work);
> xe_svm_put_pagemaps(vm);
> + drm_pagemap_release_owner(&vm->svm.peer);
> }
>
> /**
> @@ -957,7 +995,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
> xe_pm_runtime_get_noresume(xe);
> err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
> start, end, timeslice_ms,
> - xe_svm_devm_owner(xe));
> + xpagemap->pagemap.owner);
> if (err)
> xe_svm_devmem_release(&bo->devmem_allocation);
> xe_bo_unlock(bo);
> @@ -1072,7 +1110,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> .devmem_only = need_vram && devmem_possible,
> .timeslice_ms = need_vram && devmem_possible ?
> vm->xe->atomic_svm_timeslice_ms : 0,
> - .device_private_page_owner = xe_svm_devm_owner(vm->xe),
> };
> struct xe_validation_ctx vctx;
> struct drm_exec exec;
> @@ -1096,8 +1133,8 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> return err;
>
> dpagemap = xe_vma_resolve_pagemap(vma, tile);
> - if (!dpagemap && !ctx.devmem_only)
> - ctx.device_private_page_owner = NULL;
> + ctx.device_private_page_owner =
> + xe_svm_private_page_owner(vm, !dpagemap && !ctx.devmem_only);
> range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
>
> if (IS_ERR(range))
> @@ -1521,6 +1558,8 @@ static void xe_pagemap_destroy_work(struct work_struct *work)
> pagemap->range.end - pagemap->range.start + 1);
> drm_dev_exit(idx);
> }
> +
> + drm_pagemap_release_owner(&xpagemap->peer);
> kfree(xpagemap);
> }
>
> @@ -1571,6 +1610,7 @@ static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram
> dpagemap = &xpagemap->dpagemap;
> INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
> xpagemap->vr = vr;
> + xpagemap->peer.private = XE_PEER_PAGEMAP;
>
> err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
> if (err)
> @@ -1583,11 +1623,16 @@ static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram
> goto out_err;
> }
>
> + err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list,
> + xe_has_interconnect);
> + if (err)
> + goto out_err;
Not a complete review, but noticed this one when looking at the code
with the entire series applied - the error path here doesn't call
devm_release_mem_region whereas if devm_memremap_pages fails, that gets
called. Looks suspicious.
Also if devm_memremap_pages fails, should drm_pagemap_release_owner be
called? I haven't looked enough at series to know, so just asking for
now.
Matt
> +
> pagemap->type = MEMORY_DEVICE_PRIVATE;
> pagemap->range.start = res->start;
> pagemap->range.end = res->end;
> pagemap->nr_range = 1;
> - pagemap->owner = xe_svm_devm_owner(xe);
> + pagemap->owner = xpagemap->peer.owner;
> pagemap->ops = drm_pagemap_pagemap_ops_get();
> addr = devm_memremap_pages(dev, pagemap);
> if (IS_ERR(addr)) {
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index 8a49ff17ef0c..5adce108f7eb 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -6,24 +6,11 @@
> #ifndef _XE_SVM_H_
> #define _XE_SVM_H_
>
> -struct xe_device;
> -
> -/**
> - * xe_svm_devm_owner() - Return the owner of device private memory
> - * @xe: The xe device.
> - *
> - * Return: The owner of this device's device private memory to use in
> - * hmm_range_fault()-
> - */
> -static inline void *xe_svm_devm_owner(struct xe_device *xe)
> -{
> - return xe;
> -}
> -
> #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
>
> #include <drm/drm_pagemap.h>
> #include <drm/drm_gpusvm.h>
> +#include <drm/drm_pagemap_util.h>
>
> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
>
> @@ -65,6 +52,7 @@ struct xe_svm_range {
> * @pagemap: The struct dev_pagemap providing the struct pages.
> * @dpagemap: The drm_pagemap managing allocation and migration.
> * @destroy_work: Handles asnynchronous destruction and caching.
> + * @peer: Used for pagemap owner computation.
> * @hpa_base: The host physical address base for the managemd memory.
> * @vr: Backpointer to the xe_vram region.
> */
> @@ -72,6 +60,7 @@ struct xe_pagemap {
> struct dev_pagemap pagemap;
> struct drm_pagemap dpagemap;
> struct work_struct destroy_work;
> + struct drm_pagemap_peer peer;
> resource_size_t hpa_base;
> struct xe_vram_region *vr;
> };
> @@ -131,6 +120,8 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end);
>
> struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile);
>
> +void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem);
> +
> /**
> * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
> * @range: SVM range
> @@ -368,6 +359,11 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> return NULL;
> }
>
> +static inline void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
> +{
> + return NULL;
> +}
> +
> static inline void xe_svm_flush(struct xe_vm *vm)
> {
> }
> diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
> index 0d9130b1958a..e120323c43bc 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.c
> +++ b/drivers/gpu/drm/xe/xe_userptr.c
> @@ -55,7 +55,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
> struct xe_device *xe = vm->xe;
> struct drm_gpusvm_ctx ctx = {
> .read_only = xe_vma_read_only(vma),
> - .device_private_page_owner = xe_svm_devm_owner(xe),
> + .device_private_page_owner = xe_svm_private_page_owner(vm, false),
> .allow_mixed = true,
> };
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 10d77666a425..1dffcd9ab61b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2886,7 +2886,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> ctx.read_only = xe_vma_read_only(vma);
> ctx.devmem_possible = devmem_possible;
> ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> - ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
> + ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !tile);
>
> /* TODO: Threading the migration */
> xa_for_each(&op->prefetch_range.range, i, svm_range) {
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 9f0d8bf1af4f..4f9a6cdb5d02 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -8,6 +8,7 @@
>
> #include <drm/drm_gpusvm.h>
> #include <drm/drm_gpuvm.h>
> +#include <drm/drm_pagemap_util.h>
>
> #include <linux/dma-resv.h>
> #include <linux/kref.h>
> @@ -192,6 +193,8 @@ struct xe_vm {
> struct work_struct work;
> } garbage_collector;
> struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE];
> + /** @svm.peer: Used for pagemap connectivity computations. */
> + struct drm_pagemap_peer peer;
> } svm;
>
> struct xe_device *xe;
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (7 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-28 0:35 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
` (5 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
As a consequence, struct xe_vma_mem_attr() can't simply be assigned
or freed without taking the reference count of individual members
into account. Also add helpers to do that.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_svm.c | 2 +-
drivers/gpu/drm/xe/xe_vm.c | 36 +++++++++++++++++++++++++-----
drivers/gpu/drm/xe/xe_vm.h | 1 +
drivers/gpu/drm/xe/xe_vm_madvise.c | 1 +
drivers/gpu/drm/xe/xe_vm_types.h | 9 ++++++++
5 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index d27e366f8e14..d27cedeaf70c 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -330,7 +330,7 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64
if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
default_attr.pat_index = vma->attr.default_pat_index;
default_attr.default_pat_index = vma->attr.default_pat_index;
- vma->attr = default_attr;
+ xe_vma_mem_attr_copy(&vma->attr, &default_attr);
} else {
vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
range_start, range_end);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1dffcd9ab61b..3c3dc1b1ace9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -964,6 +964,27 @@ static void xe_vma_free(struct xe_vma *vma)
kfree(vma);
}
+static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
+{
+ drm_pagemap_put(attr->preferred_loc.dpagemap);
+}
+
+/**
+ * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
+ * @to: Destination.
+ * @from: Source.
+ *
+ * Copies an xe_vma_mem_attr structure taking care to get reference
+ * counting of individual members right.
+ */
+void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
+{
+ xe_vma_mem_attr_fini(to);
+ *to = *from;
+ if (to->preferred_loc.dpagemap)
+ drm_pagemap_get(to->preferred_loc.dpagemap);
+}
+
static struct xe_vma *xe_vma_create(struct xe_vm *vm,
struct xe_bo *bo,
u64 bo_offset_or_userptr,
@@ -1014,8 +1035,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
- vma->attr = *attr;
-
+ xe_vma_mem_attr_copy(&vma->attr, attr);
if (bo) {
struct drm_gpuvm_bo *vm_bo;
@@ -1023,6 +1043,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
if (IS_ERR(vm_bo)) {
+ xe_vma_mem_attr_fini(&vma->attr);
xe_vma_free(vma);
return ERR_CAST(vm_bo);
}
@@ -1042,6 +1063,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
if (err) {
+ xe_vma_mem_attr_fini(&vma->attr);
xe_vma_free(vma);
return ERR_PTR(err);
}
@@ -1057,6 +1079,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
+ xe_vma_mem_attr_fini(&vma->attr);
+
if (vma->ufence) {
xe_sync_ufence_put(vma->ufence);
vma->ufence = NULL;
@@ -4221,7 +4245,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
struct drm_gpuva_op *__op;
unsigned int vma_flags = 0;
bool remap_op = false;
- struct xe_vma_mem_attr tmp_attr;
+ struct xe_vma_mem_attr tmp_attr = {};
u16 default_pat;
int err;
@@ -4314,7 +4338,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
* VMA, so they can be assigned to newly MAP created vma.
*/
if (is_madvise)
- tmp_attr = vma->attr;
+ xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
} else if (__op->op == DRM_GPUVA_OP_MAP) {
@@ -4324,12 +4348,13 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
* copy them to new vma.
*/
if (is_madvise)
- vma->attr = tmp_attr;
+ xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
}
}
xe_vm_unlock(vm);
drm_gpuva_ops_free(&vm->gpuvm, ops);
+ xe_vma_mem_attr_fini(&tmp_attr);
return 0;
unwind_ops:
@@ -4387,3 +4412,4 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r
return xe_vm_alloc_vma(vm, &map_req, false);
}
+
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index ef8a5019574e..d328d31afe8e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -411,4 +411,5 @@ static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
+void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from);
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index cad3cf627c3f..9553008409d1 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -95,6 +95,7 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
*/
vmas[i]->attr.preferred_loc.migration_policy =
op->preferred_mem_loc.migration_policy;
+ vmas[i]->attr.preferred_loc.dpagemap = NULL;
}
}
}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 4f9a6cdb5d02..70856d536047 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -20,6 +20,8 @@
#include "xe_range_fence.h"
#include "xe_userptr.h"
+struct drm_pagemap;
+
struct xe_bo;
struct xe_svm_range;
struct xe_sync_entry;
@@ -65,6 +67,13 @@ struct xe_vma_mem_attr {
* closest device memory respectively.
*/
u32 devmem_fd;
+ /**
+ * @preferred_loc.dpagemap: Reference-counted pointer to the drm_pagemap preferred
+ * for migration on a SVM page-fault. The pointer is protected by the
+ * vm lock, and is %NULL if @devmem_fd should be consulted for special
+ * values.
+ */
+ struct drm_pagemap *dpagemap;
} preferred_loc;
/**
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes
2025-10-25 12:04 ` [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes Thomas Hellström
@ 2025-10-28 0:35 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 0:35 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:06PM +0200, Thomas Hellström wrote:
> As a consequence, struct xe_vma_mem_attr() can't simply be assigned
> or freed without taking the reference count of individual members
> into account. Also add helpers to do that.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 2 +-
> drivers/gpu/drm/xe/xe_vm.c | 36 +++++++++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_vm.h | 1 +
> drivers/gpu/drm/xe/xe_vm_madvise.c | 1 +
> drivers/gpu/drm/xe/xe_vm_types.h | 9 ++++++++
> 5 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index d27e366f8e14..d27cedeaf70c 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -330,7 +330,7 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64
> if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
> default_attr.pat_index = vma->attr.default_pat_index;
> default_attr.default_pat_index = vma->attr.default_pat_index;
> - vma->attr = default_attr;
> + xe_vma_mem_attr_copy(&vma->attr, &default_attr);
> } else {
> vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
> range_start, range_end);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 1dffcd9ab61b..3c3dc1b1ace9 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -964,6 +964,27 @@ static void xe_vma_free(struct xe_vma *vma)
> kfree(vma);
> }
>
> +static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
> +{
> + drm_pagemap_put(attr->preferred_loc.dpagemap);
> +}
> +
> +/**
> + * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
> + * @to: Destination.
> + * @from: Source.
> + *
> + * Copies an xe_vma_mem_attr structure taking care to get reference
> + * counting of individual members right.
> + */
> +void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
> +{
> + xe_vma_mem_attr_fini(to);
> + *to = *from;
> + if (to->preferred_loc.dpagemap)
> + drm_pagemap_get(to->preferred_loc.dpagemap);
> +}
> +
> static struct xe_vma *xe_vma_create(struct xe_vm *vm,
> struct xe_bo *bo,
> u64 bo_offset_or_userptr,
> @@ -1014,8 +1035,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
> if (vm->xe->info.has_atomic_enable_pte_bit)
> vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>
> - vma->attr = *attr;
> -
> + xe_vma_mem_attr_copy(&vma->attr, attr);
> if (bo) {
> struct drm_gpuvm_bo *vm_bo;
>
> @@ -1023,6 +1043,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>
> vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
> if (IS_ERR(vm_bo)) {
> + xe_vma_mem_attr_fini(&vma->attr);
> xe_vma_free(vma);
> return ERR_CAST(vm_bo);
> }
> @@ -1042,6 +1063,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>
> err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
> if (err) {
> + xe_vma_mem_attr_fini(&vma->attr);
> xe_vma_free(vma);
> return ERR_PTR(err);
> }
> @@ -1057,6 +1079,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
> {
> struct xe_vm *vm = xe_vma_vm(vma);
>
> + xe_vma_mem_attr_fini(&vma->attr);
> +
Would it be cleaner to move xe_vma_mem_attr_fini to xe_vma_free?
Matt
> if (vma->ufence) {
> xe_sync_ufence_put(vma->ufence);
> vma->ufence = NULL;
> @@ -4221,7 +4245,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
> struct drm_gpuva_op *__op;
> unsigned int vma_flags = 0;
> bool remap_op = false;
> - struct xe_vma_mem_attr tmp_attr;
> + struct xe_vma_mem_attr tmp_attr = {};
> u16 default_pat;
> int err;
>
> @@ -4314,7 +4338,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
> * VMA, so they can be assigned to newly MAP created vma.
> */
> if (is_madvise)
> - tmp_attr = vma->attr;
> + xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
>
> xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
> } else if (__op->op == DRM_GPUVA_OP_MAP) {
> @@ -4324,12 +4348,13 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
> * copy them to new vma.
> */
> if (is_madvise)
> - vma->attr = tmp_attr;
> + xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
> }
> }
>
> xe_vm_unlock(vm);
> drm_gpuva_ops_free(&vm->gpuvm, ops);
> + xe_vma_mem_attr_fini(&tmp_attr);
> return 0;
>
> unwind_ops:
> @@ -4387,3 +4412,4 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r
>
> return xe_vm_alloc_vma(vm, &map_req, false);
> }
> +
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index ef8a5019574e..d328d31afe8e 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -411,4 +411,5 @@ static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
> #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
> ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
>
> +void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from);
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index cad3cf627c3f..9553008409d1 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -95,6 +95,7 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
> */
> vmas[i]->attr.preferred_loc.migration_policy =
> op->preferred_mem_loc.migration_policy;
> + vmas[i]->attr.preferred_loc.dpagemap = NULL;
> }
> }
> }
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 4f9a6cdb5d02..70856d536047 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -20,6 +20,8 @@
> #include "xe_range_fence.h"
> #include "xe_userptr.h"
>
> +struct drm_pagemap;
> +
> struct xe_bo;
> struct xe_svm_range;
> struct xe_sync_entry;
> @@ -65,6 +67,13 @@ struct xe_vma_mem_attr {
> * closest device memory respectively.
> */
> u32 devmem_fd;
> + /**
> + * @preferred_loc.dpagemap: Reference-counted pointer to the drm_pagemap preferred
> + * for migration on a SVM page-fault. The pointer is protected by the
> + * vm lock, and is %NULL if @devmem_fd should be consulted for special
> + * values.
> + */
> + struct drm_pagemap *dpagemap;
> } preferred_loc;
>
> /**
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (8 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-25 18:01 ` kernel test robot
` (2 more replies)
2025-10-25 12:04 ` [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc() Thomas Hellström
` (4 subsequent siblings)
14 siblings, 3 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Enable migrating to foreign drm_pagemaps.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_svm.c | 39 +++++++++++++++-----------------
drivers/gpu/drm/xe/xe_svm.h | 8 +++----
drivers/gpu/drm/xe/xe_vm.c | 19 ++++++----------
drivers/gpu/drm/xe/xe_vm_types.h | 6 ++---
4 files changed, 32 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index d27cedeaf70c..36a6ac293e71 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1132,9 +1132,9 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
if (err)
return err;
- dpagemap = xe_vma_resolve_pagemap(vma, tile);
- ctx.device_private_page_owner =
- xe_svm_private_page_owner(vm, !dpagemap && !ctx.devmem_only);
+ dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) :
+ xe_vma_resolve_pagemap(vma, tile);
+ ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
if (IS_ERR(range))
@@ -1159,13 +1159,8 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
ktime_t migrate_start = xe_svm_stats_ktime_get();
- /* TODO : For multi-device dpagemap will be used to find the
- * remote tile and remote device. Will need to modify
- * xe_svm_alloc_vram to use dpagemap for future multi-device
- * support.
- */
xe_svm_range_migrate_count_stats_incr(gt, range);
- err = xe_svm_alloc_vram(tile, range, &ctx);
+ err = xe_svm_alloc_vram(range, &ctx, dpagemap);
xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
@@ -1482,7 +1477,13 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
*/
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
{
- s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
+ struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap;
+ s32 fd;
+
+ if (dpagemap)
+ return dpagemap;
+
+ fd = (s32)vma->attr.preferred_loc.devmem_fd;
if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
return NULL;
@@ -1490,28 +1491,24 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
- /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
return NULL;
}
/**
* xe_svm_alloc_vram()- Allocate device memory pages for range,
* migrating existing data.
- * @tile: tile to allocate vram from
* @range: SVM range
* @ctx: DRM GPU SVM context
+ * @dpagemap: The struct drm_pagemap representing the memory to allocate.
*
* Return: 0 on success, error code on failure.
*/
-int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
{
- struct drm_pagemap *dpagemap;
-
- xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
+ xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
- dpagemap = xe_tile_local_pagemap(tile);
return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
xe_svm_range_end(range),
range->base.gpusvm->mm,
@@ -1778,9 +1775,9 @@ int xe_pagemap_cache_create(struct xe_tile *tile)
return 0;
}
-int xe_svm_alloc_vram(struct xe_tile *tile,
- struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+int xe_svm_alloc_vram(struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 5adce108f7eb..c7027facf6e9 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -94,8 +94,8 @@ int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
-int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx);
+int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap);
struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
struct xe_vma *vma, struct drm_gpusvm_ctx *ctx);
@@ -276,8 +276,8 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
}
static inline int
-xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3c3dc1b1ace9..381d4b4abac9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2355,18 +2355,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
dpagemap = xe_vma_resolve_pagemap(vma,
xe_device_get_root_tile(vm->xe));
- /*
- * TODO: Once multigpu support is enabled will need
- * something to dereference tile from dpagemap.
- */
- if (dpagemap)
- tile = xe_device_get_root_tile(vm->xe);
} else if (prefetch_region) {
tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
XE_PL_VRAM0];
+ dpagemap = xe_tile_local_pagemap(tile);
}
- op->prefetch_range.tile = tile;
+ op->prefetch_range.dpagemap = dpagemap;
alloc_next_range:
svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
@@ -2897,7 +2892,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
{
bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
- struct xe_tile *tile = op->prefetch_range.tile;
+ struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
int err = 0;
struct xe_svm_range *svm_range;
@@ -2910,15 +2905,15 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
- ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !tile);
+ ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
- if (!tile)
+ if (!dpagemap)
xe_svm_range_migrate_to_smem(vm, svm_range);
- if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
- err = xe_svm_alloc_vram(tile, svm_range, &ctx);
+ if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!dpagemap)) {
+ err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
if (err) {
drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 70856d536047..5313bf2afa54 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -413,10 +413,10 @@ struct xe_vma_op_prefetch_range {
/** @ranges_count: number of svm ranges to map */
u32 ranges_count;
/**
- * @tile: Pointer to the tile structure containing memory to prefetch.
- * NULL if prefetch requested region is smem
+ * @dpagemap: Pointer to the dpagemap structure containing memory to prefetch.
+ * NULL if prefetch requested region is smem
*/
- struct xe_tile *tile;
+ struct drm_pagemap *dpagemap;
};
/** enum xe_vma_op_flags - flags for VMA operation */
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
@ 2025-10-25 18:01 ` kernel test robot
2025-10-29 3:27 ` Matthew Brost
2025-10-29 16:59 ` kernel test robot
2 siblings, 0 replies; 49+ messages in thread
From: kernel test robot @ 2025-10-25 18:01 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: oe-kbuild-all, Thomas Hellström, dri-devel,
himal.prasad.ghimiray, apopple, airlied, Simona Vetter,
felix.kuehling, Matthew Brost, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
Hi Thomas,
kernel test robot noticed the following build warnings:
[auto build test WARNING on drm-xe/drm-xe-next]
[also build test WARNING on next-20251024]
[cannot apply to linus/master v6.18-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-pagemap-drm-xe-Add-refcounting-to-struct-drm_pagemap/20251025-200645
base: https://gitlab.freedesktop.org/drm/xe/kernel.git drm-xe-next
patch link: https://lore.kernel.org/r/20251025120412.12262-11-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
config: x86_64-allmodconfig (https://download.01.org/0day-ci/archive/20251026/202510260048.zLL2A4Mi-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251026/202510260048.zLL2A4Mi-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510260048.zLL2A4Mi-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/gpu/drm/xe/xe_vm.c:2358:15: warning: variable 'dpagemap' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
2358 | } else if (prefetch_region) {
| ^~~~~~~~~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2364:34: note: uninitialized use occurs here
2364 | op->prefetch_range.dpagemap = dpagemap;
| ^~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2358:11: note: remove the 'if' if its condition is always true
2358 | } else if (prefetch_region) {
| ^~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2335:32: note: initialize the variable 'dpagemap' to silence this warning
2335 | struct drm_pagemap *dpagemap;
| ^
| = NULL
1 warning generated.
vim +2358 drivers/gpu/drm/xe/xe_vm.c
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2242
b06d47be7c8316 Matthew Brost 2023-07-07 2243 /*
b06d47be7c8316 Matthew Brost 2023-07-07 2244 * Create operations list from IOCTL arguments, setup operations fields so parse
b06d47be7c8316 Matthew Brost 2023-07-07 2245 * and commit steps are decoupled from IOCTL arguments. This step can fail.
b06d47be7c8316 Matthew Brost 2023-07-07 2246 */
b06d47be7c8316 Matthew Brost 2023-07-07 2247 static struct drm_gpuva_ops *
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2248 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2249 struct xe_bo *bo, u64 bo_offset_or_userptr,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2250 u64 addr, u64 range,
cad4a0d6af146e Rodrigo Vivi 2023-11-22 2251 u32 operation, u32 flags,
e1fbc4f18d5b44 Matthew Auld 2023-09-25 2252 u32 prefetch_region, u16 pat_index)
dd08ebf6c3525a Matthew Brost 2023-03-30 2253 {
b06d47be7c8316 Matthew Brost 2023-07-07 2254 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
b06d47be7c8316 Matthew Brost 2023-07-07 2255 struct drm_gpuva_ops *ops;
b06d47be7c8316 Matthew Brost 2023-07-07 2256 struct drm_gpuva_op *__op;
b06d47be7c8316 Matthew Brost 2023-07-07 2257 struct drm_gpuvm_bo *vm_bo;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2258 u64 range_end = addr + range;
dd08ebf6c3525a Matthew Brost 2023-03-30 2259 int err;
dd08ebf6c3525a Matthew Brost 2023-03-30 2260
b06d47be7c8316 Matthew Brost 2023-07-07 2261 lockdep_assert_held_write(&vm->lock);
dd08ebf6c3525a Matthew Brost 2023-03-30 2262
b06d47be7c8316 Matthew Brost 2023-07-07 2263 vm_dbg(&vm->xe->drm,
b06d47be7c8316 Matthew Brost 2023-07-07 2264 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
78ddc872c6a91d Francois Dugast 2023-09-20 2265 operation, (ULL)addr, (ULL)range,
b06d47be7c8316 Matthew Brost 2023-07-07 2266 (ULL)bo_offset_or_userptr);
dd08ebf6c3525a Matthew Brost 2023-03-30 2267
78ddc872c6a91d Francois Dugast 2023-09-20 2268 switch (operation) {
d5dc73dbd148ef Francois Dugast 2023-11-14 2269 case DRM_XE_VM_BIND_OP_MAP:
000a45dce7adc1 Boris Brezillon 2025-08-19 2270 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
000a45dce7adc1 Boris Brezillon 2025-08-19 2271 struct drm_gpuvm_map_req map_req = {
000a45dce7adc1 Boris Brezillon 2025-08-19 2272 .map.va.addr = addr,
000a45dce7adc1 Boris Brezillon 2025-08-19 2273 .map.va.range = range,
000a45dce7adc1 Boris Brezillon 2025-08-19 2274 .map.gem.obj = obj,
000a45dce7adc1 Boris Brezillon 2025-08-19 2275 .map.gem.offset = bo_offset_or_userptr,
000a45dce7adc1 Boris Brezillon 2025-08-19 2276 };
000a45dce7adc1 Boris Brezillon 2025-08-19 2277
000a45dce7adc1 Boris Brezillon 2025-08-19 2278 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
b06d47be7c8316 Matthew Brost 2023-07-07 2279 break;
000a45dce7adc1 Boris Brezillon 2025-08-19 2280 }
d5dc73dbd148ef Francois Dugast 2023-11-14 2281 case DRM_XE_VM_BIND_OP_UNMAP:
b06d47be7c8316 Matthew Brost 2023-07-07 2282 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
b06d47be7c8316 Matthew Brost 2023-07-07 2283 break;
d5dc73dbd148ef Francois Dugast 2023-11-14 2284 case DRM_XE_VM_BIND_OP_PREFETCH:
b06d47be7c8316 Matthew Brost 2023-07-07 2285 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
b06d47be7c8316 Matthew Brost 2023-07-07 2286 break;
d5dc73dbd148ef Francois Dugast 2023-11-14 2287 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
c73acc1eeba5e3 Francois Dugast 2023-09-12 2288 xe_assert(vm->xe, bo);
dd08ebf6c3525a Matthew Brost 2023-03-30 2289
08a4f00e62bc96 Thomas Hellström 2023-09-08 2290 err = xe_bo_lock(bo, true);
b06d47be7c8316 Matthew Brost 2023-07-07 2291 if (err)
b06d47be7c8316 Matthew Brost 2023-07-07 2292 return ERR_PTR(err);
b06d47be7c8316 Matthew Brost 2023-07-07 2293
9d0c1c5618be02 Thomas Hellström 2023-12-22 2294 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2295 if (IS_ERR(vm_bo)) {
9d0c1c5618be02 Thomas Hellström 2023-12-22 2296 xe_bo_unlock(bo);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2297 return ERR_CAST(vm_bo);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2298 }
dd08ebf6c3525a Matthew Brost 2023-03-30 2299
b06d47be7c8316 Matthew Brost 2023-07-07 2300 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
b06d47be7c8316 Matthew Brost 2023-07-07 2301 drm_gpuvm_bo_put(vm_bo);
08a4f00e62bc96 Thomas Hellström 2023-09-08 2302 xe_bo_unlock(bo);
b06d47be7c8316 Matthew Brost 2023-07-07 2303 break;
b06d47be7c8316 Matthew Brost 2023-07-07 2304 default:
5c0553cdc811bb Francois Dugast 2023-09-12 2305 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
b06d47be7c8316 Matthew Brost 2023-07-07 2306 ops = ERR_PTR(-EINVAL);
dd08ebf6c3525a Matthew Brost 2023-03-30 2307 }
40709aa761acbc Matthew Brost 2023-11-20 2308 if (IS_ERR(ops))
40709aa761acbc Matthew Brost 2023-11-20 2309 return ops;
dd08ebf6c3525a Matthew Brost 2023-03-30 2310
40709aa761acbc Matthew Brost 2023-11-20 2311 drm_gpuva_for_each_op(__op, ops) {
40709aa761acbc Matthew Brost 2023-11-20 2312 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
40709aa761acbc Matthew Brost 2023-11-20 2313
40709aa761acbc Matthew Brost 2023-11-20 2314 if (__op->op == DRM_GPUVA_OP_MAP) {
06e7139a034f26 Thomas Hellström 2024-04-23 2315 op->map.immediate =
06e7139a034f26 Thomas Hellström 2024-04-23 2316 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2317 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2318 op->map.vma_flags |= XE_VMA_READ_ONLY;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2319 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2320 op->map.vma_flags |= DRM_GPUVA_SPARSE;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2321 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2322 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2323 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2324 op->map.vma_flags |= XE_VMA_DUMPABLE;
59a2d3f38ab23c Thomas Hellström 2025-10-15 2325 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
59a2d3f38ab23c Thomas Hellström 2025-10-15 2326 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
e1fbc4f18d5b44 Matthew Auld 2023-09-25 2327 op->map.pat_index = pat_index;
5b658b7e89c312 Oak Zeng 2025-04-03 2328 op->map.invalidate_on_bind =
5b658b7e89c312 Oak Zeng 2025-04-03 2329 __xe_vm_needs_clear_scratch_pages(vm, flags);
40709aa761acbc Matthew Brost 2023-11-20 2330 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2331 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2332 struct xe_tile *tile;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2333 struct xe_svm_range *svm_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2334 struct drm_gpusvm_ctx ctx = {};
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2335 struct drm_pagemap *dpagemap;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2336 u8 id, tile_mask = 0;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2337 u32 i;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2338
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2339 if (!xe_vma_is_cpu_addr_mirror(vma)) {
40709aa761acbc Matthew Brost 2023-11-20 2340 op->prefetch.region = prefetch_region;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2341 break;
40709aa761acbc Matthew Brost 2023-11-20 2342 }
40709aa761acbc Matthew Brost 2023-11-20 2343
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2344 ctx.read_only = xe_vma_read_only(vma);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2345 ctx.devmem_possible = IS_DGFX(vm->xe) &&
4a1eaf7d110aa5 Matthew Brost 2025-07-10 2346 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2347
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2348 for_each_tile(tile, vm->xe, id)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2349 tile_mask |= 0x1 << id;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2350
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2351 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2352 op->prefetch_range.ranges_count = 0;
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2353 tile = NULL;
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2354
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2355 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2356 dpagemap = xe_vma_resolve_pagemap(vma,
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2357 xe_device_get_root_tile(vm->xe));
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 @2358 } else if (prefetch_region) {
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2359 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2360 XE_PL_VRAM0];
1b7e4275a5db37 Thomas Hellström 2025-10-25 2361 dpagemap = xe_tile_local_pagemap(tile);
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2362 }
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2363
1b7e4275a5db37 Thomas Hellström 2025-10-25 2364 op->prefetch_range.dpagemap = dpagemap;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2365 alloc_next_range:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2366 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2367
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2368 if (PTR_ERR(svm_range) == -ENOENT) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2369 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2370
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2371 addr = ret == ULONG_MAX ? 0 : ret;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2372 if (addr)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2373 goto alloc_next_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2374 else
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2375 goto print_op_label;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2376 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2377
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2378 if (IS_ERR(svm_range)) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2379 err = PTR_ERR(svm_range);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2380 goto unwind_prefetch_ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2381 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2382
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2383 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2384 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2385 goto check_next_range;
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2386 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2387
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2388 err = xa_alloc(&op->prefetch_range.range,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2389 &i, svm_range, xa_limit_32b,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2390 GFP_KERNEL);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2391
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2392 if (err)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2393 goto unwind_prefetch_ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2394
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2395 op->prefetch_range.ranges_count++;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2396 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2397 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2398 check_next_range:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2399 if (range_end > xe_svm_range_end(svm_range) &&
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2400 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2401 addr = xe_svm_range_end(svm_range);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2402 goto alloc_next_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2403 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2404 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2405 print_op_label:
b06d47be7c8316 Matthew Brost 2023-07-07 2406 print_op(vm->xe, __op);
40709aa761acbc Matthew Brost 2023-11-20 2407 }
b06d47be7c8316 Matthew Brost 2023-07-07 2408
b06d47be7c8316 Matthew Brost 2023-07-07 2409 return ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2410
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2411 unwind_prefetch_ops:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2412 xe_svm_prefetch_gpuva_ops_fini(ops);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2413 drm_gpuva_ops_free(&vm->gpuvm, ops);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2414 return ERR_PTR(err);
dd08ebf6c3525a Matthew Brost 2023-03-30 2415 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2416
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
2025-10-25 18:01 ` kernel test robot
@ 2025-10-29 3:27 ` Matthew Brost
2025-10-29 14:56 ` Thomas Hellström
2025-10-29 16:59 ` kernel test robot
2 siblings, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 3:27 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:07PM +0200, Thomas Hellström wrote:
> Enable migrating to foreign drm_pagemaps.
>
Slightly better commit message ahead of merging.
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 39 +++++++++++++++-----------------
> drivers/gpu/drm/xe/xe_svm.h | 8 +++----
> drivers/gpu/drm/xe/xe_vm.c | 19 ++++++----------
> drivers/gpu/drm/xe/xe_vm_types.h | 6 ++---
> 4 files changed, 32 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index d27cedeaf70c..36a6ac293e71 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -1132,9 +1132,9 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> if (err)
> return err;
>
> - dpagemap = xe_vma_resolve_pagemap(vma, tile);
> - ctx.device_private_page_owner =
> - xe_svm_private_page_owner(vm, !dpagemap && !ctx.devmem_only);
> + dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) :
> + xe_vma_resolve_pagemap(vma, tile);
> + ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
> range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
>
> if (IS_ERR(range))
> @@ -1159,13 +1159,8 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
> ktime_t migrate_start = xe_svm_stats_ktime_get();
>
> - /* TODO : For multi-device dpagemap will be used to find the
> - * remote tile and remote device. Will need to modify
> - * xe_svm_alloc_vram to use dpagemap for future multi-device
> - * support.
> - */
> xe_svm_range_migrate_count_stats_incr(gt, range);
> - err = xe_svm_alloc_vram(tile, range, &ctx);
> + err = xe_svm_alloc_vram(range, &ctx, dpagemap);
> xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
> ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
> if (err) {
> @@ -1482,7 +1477,13 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
> */
> struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
> {
> - s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
> + struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap;
> + s32 fd;
> +
> + if (dpagemap)
> + return dpagemap;
> +
> + fd = (s32)vma->attr.preferred_loc.devmem_fd;
>
> if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
> return NULL;
> @@ -1490,28 +1491,24 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
> return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
>
> - /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
> return NULL;
> }
>
> /**
> * xe_svm_alloc_vram()- Allocate device memory pages for range,
> * migrating existing data.
> - * @tile: tile to allocate vram from
> * @range: SVM range
> * @ctx: DRM GPU SVM context
> + * @dpagemap: The struct drm_pagemap representing the memory to allocate.
> *
> * Return: 0 on success, error code on failure.
> */
> -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
> - const struct drm_gpusvm_ctx *ctx)
> +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
> + struct drm_pagemap *dpagemap)
> {
> - struct drm_pagemap *dpagemap;
> -
> - xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
> + xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
> range_debug(range, "ALLOCATE VRAM");
>
> - dpagemap = xe_tile_local_pagemap(tile);
> return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
> xe_svm_range_end(range),
> range->base.gpusvm->mm,
> @@ -1778,9 +1775,9 @@ int xe_pagemap_cache_create(struct xe_tile *tile)
> return 0;
> }
>
> -int xe_svm_alloc_vram(struct xe_tile *tile,
> - struct xe_svm_range *range,
> - const struct drm_gpusvm_ctx *ctx)
> +int xe_svm_alloc_vram(struct xe_svm_range *range,
> + const struct drm_gpusvm_ctx *ctx,
> + struct drm_pagemap *dpagemap)
> {
> return -EOPNOTSUPP;
> }
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index 5adce108f7eb..c7027facf6e9 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -94,8 +94,8 @@ int xe_svm_bo_evict(struct xe_bo *bo);
>
> void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
>
> -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
> - const struct drm_gpusvm_ctx *ctx);
> +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
> + struct drm_pagemap *dpagemap);
>
> struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
> struct xe_vma *vma, struct drm_gpusvm_ctx *ctx);
> @@ -276,8 +276,8 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
> }
>
> static inline int
> -xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
> - const struct drm_gpusvm_ctx *ctx)
> +xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
> + struct drm_pagemap *dpagemap)
> {
> return -EOPNOTSUPP;
> }
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 3c3dc1b1ace9..381d4b4abac9 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2355,18 +2355,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
> if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
> dpagemap = xe_vma_resolve_pagemap(vma,
> xe_device_get_root_tile(vm->xe));
> - /*
> - * TODO: Once multigpu support is enabled will need
> - * something to dereference tile from dpagemap.
> - */
> - if (dpagemap)
> - tile = xe_device_get_root_tile(vm->xe);
> } else if (prefetch_region) {
> tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
> XE_PL_VRAM0];
> + dpagemap = xe_tile_local_pagemap(tile);
Per kernel test robot, dpagemap needs to initialized to NULL. There is
existing code which tile to NULL after a for_each_tile loop that can
also be droppped.
Everything else looks right.
Matt
> }
>
> - op->prefetch_range.tile = tile;
> + op->prefetch_range.dpagemap = dpagemap;
> alloc_next_range:
> svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
>
> @@ -2897,7 +2892,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> {
> bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
> struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> - struct xe_tile *tile = op->prefetch_range.tile;
> + struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
> int err = 0;
>
> struct xe_svm_range *svm_range;
> @@ -2910,15 +2905,15 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> ctx.read_only = xe_vma_read_only(vma);
> ctx.devmem_possible = devmem_possible;
> ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> - ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !tile);
> + ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
>
> /* TODO: Threading the migration */
> xa_for_each(&op->prefetch_range.range, i, svm_range) {
> - if (!tile)
> + if (!dpagemap)
> xe_svm_range_migrate_to_smem(vm, svm_range);
>
> - if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
> - err = xe_svm_alloc_vram(tile, svm_range, &ctx);
> + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!dpagemap)) {
> + err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
> if (err) {
> drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 70856d536047..5313bf2afa54 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -413,10 +413,10 @@ struct xe_vma_op_prefetch_range {
> /** @ranges_count: number of svm ranges to map */
> u32 ranges_count;
> /**
> - * @tile: Pointer to the tile structure containing memory to prefetch.
> - * NULL if prefetch requested region is smem
> + * @dpagemap: Pointer to the dpagemap structure containing memory to prefetch.
> + * NULL if prefetch requested region is smem
> */
> - struct xe_tile *tile;
> + struct drm_pagemap *dpagemap;
> };
>
> /** enum xe_vma_op_flags - flags for VMA operation */
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
2025-10-29 3:27 ` Matthew Brost
@ 2025-10-29 14:56 ` Thomas Hellström
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-29 14:56 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 20:27 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:07PM +0200, Thomas Hellström wrote:
> > Enable migrating to foreign drm_pagemaps.
> >
>
> Slightly better commit message ahead of merging.
>
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_svm.c | 39 +++++++++++++++-------------
> > ----
> > drivers/gpu/drm/xe/xe_svm.h | 8 +++----
> > drivers/gpu/drm/xe/xe_vm.c | 19 ++++++----------
> > drivers/gpu/drm/xe/xe_vm_types.h | 6 ++---
> > 4 files changed, 32 insertions(+), 40 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index d27cedeaf70c..36a6ac293e71 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -1132,9 +1132,9 @@ static int __xe_svm_handle_pagefault(struct
> > xe_vm *vm, struct xe_vma *vma,
> > if (err)
> > return err;
> >
> > - dpagemap = xe_vma_resolve_pagemap(vma, tile);
> > - ctx.device_private_page_owner =
> > - xe_svm_private_page_owner(vm, !dpagemap &&
> > !ctx.devmem_only);
> > + dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) :
> > + xe_vma_resolve_pagemap(vma, tile);
> > + ctx.device_private_page_owner =
> > xe_svm_private_page_owner(vm, !dpagemap);
> > range = xe_svm_range_find_or_insert(vm, fault_addr, vma,
> > &ctx);
> >
> > if (IS_ERR(range))
> > @@ -1159,13 +1159,8 @@ static int __xe_svm_handle_pagefault(struct
> > xe_vm *vm, struct xe_vma *vma,
> > xe_svm_range_needs_migrate_to_vram(range, vma,
> > !!dpagemap || ctx.devmem_only)) {
> > ktime_t migrate_start = xe_svm_stats_ktime_get();
> >
> > - /* TODO : For multi-device dpagemap will be used
> > to find the
> > - * remote tile and remote device. Will need to
> > modify
> > - * xe_svm_alloc_vram to use dpagemap for future
> > multi-device
> > - * support.
> > - */
> > xe_svm_range_migrate_count_stats_incr(gt, range);
> > - err = xe_svm_alloc_vram(tile, range, &ctx);
> > + err = xe_svm_alloc_vram(range, &ctx, dpagemap);
> > xe_svm_range_migrate_us_stats_incr(gt, range,
> > migrate_start);
> > ctx.timeslice_ms <<= 1; /* Double
> > timeslice if we have to retry */
> > if (err) {
> > @@ -1482,7 +1477,13 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct
> > xe_vm *vm, u64 start, u64 end)
> > */
> > struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma,
> > struct xe_tile *tile)
> > {
> > - s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
> > + struct drm_pagemap *dpagemap = vma-
> > >attr.preferred_loc.dpagemap;
> > + s32 fd;
> > +
> > + if (dpagemap)
> > + return dpagemap;
> > +
> > + fd = (s32)vma->attr.preferred_loc.devmem_fd;
> >
> > if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
> > return NULL;
> > @@ -1490,28 +1491,24 @@ struct drm_pagemap
> > *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> > if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
> > return IS_DGFX(tile_to_xe(tile)) ?
> > xe_tile_local_pagemap(tile) : NULL;
> >
> > - /* TODO: Support multi-device with drm_pagemap_from_fd(fd)
> > */
> > return NULL;
> > }
> >
> > /**
> > * xe_svm_alloc_vram()- Allocate device memory pages for range,
> > * migrating existing data.
> > - * @tile: tile to allocate vram from
> > * @range: SVM range
> > * @ctx: DRM GPU SVM context
> > + * @dpagemap: The struct drm_pagemap representing the memory to
> > allocate.
> > *
> > * Return: 0 on success, error code on failure.
> > */
> > -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range
> > *range,
> > - const struct drm_gpusvm_ctx *ctx)
> > +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct
> > drm_gpusvm_ctx *ctx,
> > + struct drm_pagemap *dpagemap)
> > {
> > - struct drm_pagemap *dpagemap;
> > -
> > - xe_assert(tile_to_xe(tile), range-
> > >base.pages.flags.migrate_devmem);
> > + xe_assert(range_to_vm(&range->base)->xe, range-
> > >base.pages.flags.migrate_devmem);
> > range_debug(range, "ALLOCATE VRAM");
> >
> > - dpagemap = xe_tile_local_pagemap(tile);
> > return drm_pagemap_populate_mm(dpagemap,
> > xe_svm_range_start(range),
> > xe_svm_range_end(range),
> > range->base.gpusvm->mm,
> > @@ -1778,9 +1775,9 @@ int xe_pagemap_cache_create(struct xe_tile
> > *tile)
> > return 0;
> > }
> >
> > -int xe_svm_alloc_vram(struct xe_tile *tile,
> > - struct xe_svm_range *range,
> > - const struct drm_gpusvm_ctx *ctx)
> > +int xe_svm_alloc_vram(struct xe_svm_range *range,
> > + const struct drm_gpusvm_ctx *ctx,
> > + struct drm_pagemap *dpagemap)
> > {
> > return -EOPNOTSUPP;
> > }
> > diff --git a/drivers/gpu/drm/xe/xe_svm.h
> > b/drivers/gpu/drm/xe/xe_svm.h
> > index 5adce108f7eb..c7027facf6e9 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.h
> > +++ b/drivers/gpu/drm/xe/xe_svm.h
> > @@ -94,8 +94,8 @@ int xe_svm_bo_evict(struct xe_bo *bo);
> >
> > void xe_svm_range_debug(struct xe_svm_range *range, const char
> > *operation);
> >
> > -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range
> > *range,
> > - const struct drm_gpusvm_ctx *ctx);
> > +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct
> > drm_gpusvm_ctx *ctx,
> > + struct drm_pagemap *dpagemap);
> >
> > struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm,
> > u64 addr,
> > struct xe_vma
> > *vma, struct drm_gpusvm_ctx *ctx);
> > @@ -276,8 +276,8 @@ void xe_svm_range_debug(struct xe_svm_range
> > *range, const char *operation)
> > }
> >
> > static inline int
> > -xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range
> > *range,
> > - const struct drm_gpusvm_ctx *ctx)
> > +xe_svm_alloc_vram(struct xe_svm_range *range, const struct
> > drm_gpusvm_ctx *ctx,
> > + struct drm_pagemap *dpagemap)
> > {
> > return -EOPNOTSUPP;
> > }
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > b/drivers/gpu/drm/xe/xe_vm.c
> > index 3c3dc1b1ace9..381d4b4abac9 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2355,18 +2355,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm,
> > struct xe_vma_ops *vops,
> > if (prefetch_region ==
> > DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
> > dpagemap =
> > xe_vma_resolve_pagemap(vma,
> >
> > xe_device_get_root_tile(vm->xe));
> > - /*
> > - * TODO: Once multigpu support is
> > enabled will need
> > - * something to dereference tile
> > from dpagemap.
> > - */
> > - if (dpagemap)
> > - tile =
> > xe_device_get_root_tile(vm->xe);
> > } else if (prefetch_region) {
> > tile = &vm->xe-
> > >tiles[region_to_mem_type[prefetch_region] -
> >
> > XE_PL_VRAM0];
> > + dpagemap =
> > xe_tile_local_pagemap(tile);
>
> Per kernel test robot, dpagemap needs to initialized to NULL. There
> is
> existing code which tile to NULL after a for_each_tile loop that can
> also be droppped.
>
Yeah I noticed that. I'll fix this up.
Thanks,
Thomas
> Everything else looks right.
>
> Matt
>
> > }
> >
> > - op->prefetch_range.tile = tile;
> > + op->prefetch_range.dpagemap = dpagemap;
> > alloc_next_range:
> > svm_range =
> > xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
> >
> > @@ -2897,7 +2892,7 @@ static int prefetch_ranges(struct xe_vm *vm,
> > struct xe_vma_op *op)
> > {
> > bool devmem_possible = IS_DGFX(vm->xe) &&
> > IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
> > struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > - struct xe_tile *tile = op->prefetch_range.tile;
> > + struct drm_pagemap *dpagemap = op-
> > >prefetch_range.dpagemap;
> > int err = 0;
> >
> > struct xe_svm_range *svm_range;
> > @@ -2910,15 +2905,15 @@ static int prefetch_ranges(struct xe_vm
> > *vm, struct xe_vma_op *op)
> > ctx.read_only = xe_vma_read_only(vma);
> > ctx.devmem_possible = devmem_possible;
> > ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> > - ctx.device_private_page_owner =
> > xe_svm_private_page_owner(vm, !tile);
> > + ctx.device_private_page_owner =
> > xe_svm_private_page_owner(vm, !dpagemap);
> >
> > /* TODO: Threading the migration */
> > xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > - if (!tile)
> > + if (!dpagemap)
> > xe_svm_range_migrate_to_smem(vm,
> > svm_range);
> >
> > - if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, !!tile)) {
> > - err = xe_svm_alloc_vram(tile, svm_range,
> > &ctx);
> > + if (xe_svm_range_needs_migrate_to_vram(svm_range,
> > vma, !!dpagemap)) {
> > + err = xe_svm_alloc_vram(svm_range, &ctx,
> > dpagemap);
> > if (err) {
> > drm_dbg(&vm->xe->drm, "VRAM
> > allocation failed, retry from userspace, asid=%u, gpusvm=%p,
> > errno=%pe\n",
> > vm->usm.asid, &vm-
> > >svm.gpusvm, ERR_PTR(err));
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index 70856d536047..5313bf2afa54 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -413,10 +413,10 @@ struct xe_vma_op_prefetch_range {
> > /** @ranges_count: number of svm ranges to map */
> > u32 ranges_count;
> > /**
> > - * @tile: Pointer to the tile structure containing memory
> > to prefetch.
> > - * NULL if prefetch requested region is smem
> > + * @dpagemap: Pointer to the dpagemap structure containing
> > memory to prefetch.
> > + * NULL if prefetch requested region is smem
> > */
> > - struct xe_tile *tile;
> > + struct drm_pagemap *dpagemap;
> > };
> >
> > /** enum xe_vma_op_flags - flags for VMA operation */
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
2025-10-25 18:01 ` kernel test robot
2025-10-29 3:27 ` Matthew Brost
@ 2025-10-29 16:59 ` kernel test robot
2 siblings, 0 replies; 49+ messages in thread
From: kernel test robot @ 2025-10-29 16:59 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: llvm, oe-kbuild-all, Thomas Hellström, dri-devel,
himal.prasad.ghimiray, apopple, airlied, Simona Vetter,
felix.kuehling, Matthew Brost, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
Hi Thomas,
kernel test robot noticed the following build errors:
[auto build test ERROR on drm-xe/drm-xe-next]
[also build test ERROR on next-20251029]
[cannot apply to linus/master v6.18-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-pagemap-drm-xe-Add-refcounting-to-struct-drm_pagemap/20251025-200645
base: https://gitlab.freedesktop.org/drm/xe/kernel.git drm-xe-next
patch link: https://lore.kernel.org/r/20251025120412.12262-11-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate
config: i386-randconfig-005-20251029 (https://download.01.org/0day-ci/archive/20251030/202510300027.HZ9D3Ruj-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251030/202510300027.HZ9D3Ruj-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510300027.HZ9D3Ruj-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/xe/xe_vm.c:2358:15: error: variable 'dpagemap' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized]
2358 | } else if (prefetch_region) {
| ^~~~~~~~~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2364:34: note: uninitialized use occurs here
2364 | op->prefetch_range.dpagemap = dpagemap;
| ^~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2358:11: note: remove the 'if' if its condition is always true
2358 | } else if (prefetch_region) {
| ^~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/xe/xe_vm.c:2335:32: note: initialize the variable 'dpagemap' to silence this warning
2335 | struct drm_pagemap *dpagemap;
| ^
| = NULL
1 error generated.
vim +2358 drivers/gpu/drm/xe/xe_vm.c
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2242
b06d47be7c8316 Matthew Brost 2023-07-07 2243 /*
b06d47be7c8316 Matthew Brost 2023-07-07 2244 * Create operations list from IOCTL arguments, setup operations fields so parse
b06d47be7c8316 Matthew Brost 2023-07-07 2245 * and commit steps are decoupled from IOCTL arguments. This step can fail.
b06d47be7c8316 Matthew Brost 2023-07-07 2246 */
b06d47be7c8316 Matthew Brost 2023-07-07 2247 static struct drm_gpuva_ops *
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2248 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2249 struct xe_bo *bo, u64 bo_offset_or_userptr,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2250 u64 addr, u64 range,
cad4a0d6af146e Rodrigo Vivi 2023-11-22 2251 u32 operation, u32 flags,
e1fbc4f18d5b44 Matthew Auld 2023-09-25 2252 u32 prefetch_region, u16 pat_index)
dd08ebf6c3525a Matthew Brost 2023-03-30 2253 {
b06d47be7c8316 Matthew Brost 2023-07-07 2254 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
b06d47be7c8316 Matthew Brost 2023-07-07 2255 struct drm_gpuva_ops *ops;
b06d47be7c8316 Matthew Brost 2023-07-07 2256 struct drm_gpuva_op *__op;
b06d47be7c8316 Matthew Brost 2023-07-07 2257 struct drm_gpuvm_bo *vm_bo;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2258 u64 range_end = addr + range;
dd08ebf6c3525a Matthew Brost 2023-03-30 2259 int err;
dd08ebf6c3525a Matthew Brost 2023-03-30 2260
b06d47be7c8316 Matthew Brost 2023-07-07 2261 lockdep_assert_held_write(&vm->lock);
dd08ebf6c3525a Matthew Brost 2023-03-30 2262
b06d47be7c8316 Matthew Brost 2023-07-07 2263 vm_dbg(&vm->xe->drm,
b06d47be7c8316 Matthew Brost 2023-07-07 2264 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
78ddc872c6a91d Francois Dugast 2023-09-20 2265 operation, (ULL)addr, (ULL)range,
b06d47be7c8316 Matthew Brost 2023-07-07 2266 (ULL)bo_offset_or_userptr);
dd08ebf6c3525a Matthew Brost 2023-03-30 2267
78ddc872c6a91d Francois Dugast 2023-09-20 2268 switch (operation) {
d5dc73dbd148ef Francois Dugast 2023-11-14 2269 case DRM_XE_VM_BIND_OP_MAP:
000a45dce7adc1 Boris Brezillon 2025-08-19 2270 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
000a45dce7adc1 Boris Brezillon 2025-08-19 2271 struct drm_gpuvm_map_req map_req = {
000a45dce7adc1 Boris Brezillon 2025-08-19 2272 .map.va.addr = addr,
000a45dce7adc1 Boris Brezillon 2025-08-19 2273 .map.va.range = range,
000a45dce7adc1 Boris Brezillon 2025-08-19 2274 .map.gem.obj = obj,
000a45dce7adc1 Boris Brezillon 2025-08-19 2275 .map.gem.offset = bo_offset_or_userptr,
000a45dce7adc1 Boris Brezillon 2025-08-19 2276 };
000a45dce7adc1 Boris Brezillon 2025-08-19 2277
000a45dce7adc1 Boris Brezillon 2025-08-19 2278 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
b06d47be7c8316 Matthew Brost 2023-07-07 2279 break;
000a45dce7adc1 Boris Brezillon 2025-08-19 2280 }
d5dc73dbd148ef Francois Dugast 2023-11-14 2281 case DRM_XE_VM_BIND_OP_UNMAP:
b06d47be7c8316 Matthew Brost 2023-07-07 2282 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
b06d47be7c8316 Matthew Brost 2023-07-07 2283 break;
d5dc73dbd148ef Francois Dugast 2023-11-14 2284 case DRM_XE_VM_BIND_OP_PREFETCH:
b06d47be7c8316 Matthew Brost 2023-07-07 2285 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
b06d47be7c8316 Matthew Brost 2023-07-07 2286 break;
d5dc73dbd148ef Francois Dugast 2023-11-14 2287 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
c73acc1eeba5e3 Francois Dugast 2023-09-12 2288 xe_assert(vm->xe, bo);
dd08ebf6c3525a Matthew Brost 2023-03-30 2289
08a4f00e62bc96 Thomas Hellström 2023-09-08 2290 err = xe_bo_lock(bo, true);
b06d47be7c8316 Matthew Brost 2023-07-07 2291 if (err)
b06d47be7c8316 Matthew Brost 2023-07-07 2292 return ERR_PTR(err);
b06d47be7c8316 Matthew Brost 2023-07-07 2293
9d0c1c5618be02 Thomas Hellström 2023-12-22 2294 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2295 if (IS_ERR(vm_bo)) {
9d0c1c5618be02 Thomas Hellström 2023-12-22 2296 xe_bo_unlock(bo);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2297 return ERR_CAST(vm_bo);
9d0c1c5618be02 Thomas Hellström 2023-12-22 2298 }
dd08ebf6c3525a Matthew Brost 2023-03-30 2299
b06d47be7c8316 Matthew Brost 2023-07-07 2300 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
b06d47be7c8316 Matthew Brost 2023-07-07 2301 drm_gpuvm_bo_put(vm_bo);
08a4f00e62bc96 Thomas Hellström 2023-09-08 2302 xe_bo_unlock(bo);
b06d47be7c8316 Matthew Brost 2023-07-07 2303 break;
b06d47be7c8316 Matthew Brost 2023-07-07 2304 default:
5c0553cdc811bb Francois Dugast 2023-09-12 2305 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
b06d47be7c8316 Matthew Brost 2023-07-07 2306 ops = ERR_PTR(-EINVAL);
dd08ebf6c3525a Matthew Brost 2023-03-30 2307 }
40709aa761acbc Matthew Brost 2023-11-20 2308 if (IS_ERR(ops))
40709aa761acbc Matthew Brost 2023-11-20 2309 return ops;
dd08ebf6c3525a Matthew Brost 2023-03-30 2310
40709aa761acbc Matthew Brost 2023-11-20 2311 drm_gpuva_for_each_op(__op, ops) {
40709aa761acbc Matthew Brost 2023-11-20 2312 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
40709aa761acbc Matthew Brost 2023-11-20 2313
40709aa761acbc Matthew Brost 2023-11-20 2314 if (__op->op == DRM_GPUVA_OP_MAP) {
06e7139a034f26 Thomas Hellström 2024-04-23 2315 op->map.immediate =
06e7139a034f26 Thomas Hellström 2024-04-23 2316 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2317 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2318 op->map.vma_flags |= XE_VMA_READ_ONLY;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2319 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2320 op->map.vma_flags |= DRM_GPUVA_SPARSE;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2321 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2322 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
b3af8658ec70f2 Thomas Hellström 2025-10-15 2323 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
b3af8658ec70f2 Thomas Hellström 2025-10-15 2324 op->map.vma_flags |= XE_VMA_DUMPABLE;
59a2d3f38ab23c Thomas Hellström 2025-10-15 2325 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
59a2d3f38ab23c Thomas Hellström 2025-10-15 2326 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
e1fbc4f18d5b44 Matthew Auld 2023-09-25 2327 op->map.pat_index = pat_index;
5b658b7e89c312 Oak Zeng 2025-04-03 2328 op->map.invalidate_on_bind =
5b658b7e89c312 Oak Zeng 2025-04-03 2329 __xe_vm_needs_clear_scratch_pages(vm, flags);
40709aa761acbc Matthew Brost 2023-11-20 2330 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2331 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2332 struct xe_tile *tile;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2333 struct xe_svm_range *svm_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2334 struct drm_gpusvm_ctx ctx = {};
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2335 struct drm_pagemap *dpagemap;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2336 u8 id, tile_mask = 0;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2337 u32 i;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2338
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2339 if (!xe_vma_is_cpu_addr_mirror(vma)) {
40709aa761acbc Matthew Brost 2023-11-20 2340 op->prefetch.region = prefetch_region;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2341 break;
40709aa761acbc Matthew Brost 2023-11-20 2342 }
40709aa761acbc Matthew Brost 2023-11-20 2343
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2344 ctx.read_only = xe_vma_read_only(vma);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2345 ctx.devmem_possible = IS_DGFX(vm->xe) &&
4a1eaf7d110aa5 Matthew Brost 2025-07-10 2346 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2347
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2348 for_each_tile(tile, vm->xe, id)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2349 tile_mask |= 0x1 << id;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2350
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2351 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2352 op->prefetch_range.ranges_count = 0;
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2353 tile = NULL;
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2354
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2355 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2356 dpagemap = xe_vma_resolve_pagemap(vma,
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2357 xe_device_get_root_tile(vm->xe));
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 @2358 } else if (prefetch_region) {
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2359 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2360 XE_PL_VRAM0];
1b7e4275a5db37 Thomas Hellström 2025-10-25 2361 dpagemap = xe_tile_local_pagemap(tile);
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2362 }
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2363
1b7e4275a5db37 Thomas Hellström 2025-10-25 2364 op->prefetch_range.dpagemap = dpagemap;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2365 alloc_next_range:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2366 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2367
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2368 if (PTR_ERR(svm_range) == -ENOENT) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2369 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2370
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2371 addr = ret == ULONG_MAX ? 0 : ret;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2372 if (addr)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2373 goto alloc_next_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2374 else
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2375 goto print_op_label;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2376 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2377
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2378 if (IS_ERR(svm_range)) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2379 err = PTR_ERR(svm_range);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2380 goto unwind_prefetch_ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2381 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2382
c1bb69a2e8e2d5 Himal Prasad Ghimiray 2025-08-21 2383 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2384 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2385 goto check_next_range;
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2386 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2387
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2388 err = xa_alloc(&op->prefetch_range.range,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2389 &i, svm_range, xa_limit_32b,
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2390 GFP_KERNEL);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2391
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2392 if (err)
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2393 goto unwind_prefetch_ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2394
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2395 op->prefetch_range.ranges_count++;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2396 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
5aee6e33e19593 Himal Prasad Ghimiray 2025-05-13 2397 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2398 check_next_range:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2399 if (range_end > xe_svm_range_end(svm_range) &&
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2400 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2401 addr = xe_svm_range_end(svm_range);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2402 goto alloc_next_range;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2403 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2404 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2405 print_op_label:
b06d47be7c8316 Matthew Brost 2023-07-07 2406 print_op(vm->xe, __op);
40709aa761acbc Matthew Brost 2023-11-20 2407 }
b06d47be7c8316 Matthew Brost 2023-07-07 2408
b06d47be7c8316 Matthew Brost 2023-07-07 2409 return ops;
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2410
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2411 unwind_prefetch_ops:
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2412 xe_svm_prefetch_gpuva_ops_fini(ops);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2413 drm_gpuva_ops_free(&vm->gpuvm, ops);
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2414 return ERR_PTR(err);
dd08ebf6c3525a Matthew Brost 2023-03-30 2415 }
09ba0a8f06cd69 Himal Prasad Ghimiray 2025-05-13 2416
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc()
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (9 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-27 23:14 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm Thomas Hellström
` (3 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Simplify madvise_preferred_mem_loc by removing repetitive patterns
in favour of local variables.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_vm_madvise.c | 21 +++++++++++----------
drivers/gpu/drm/xe/xe_vm_types.h | 2 +-
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 9553008409d1..d6f47c8e146d 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -81,21 +81,22 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
for (i = 0; i < num_vmas; i++) {
+ struct xe_vma *vma = vmas[i];
+ struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
+
/*TODO: Extend attributes to bo based vmas */
- if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
- vmas[i]->attr.preferred_loc.migration_policy ==
- op->preferred_mem_loc.migration_policy) ||
- !xe_vma_is_cpu_addr_mirror(vmas[i])) {
- vmas[i]->skip_invalidation = true;
+ if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
+ loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
+ !xe_vma_is_cpu_addr_mirror(vma)) {
+ vma->skip_invalidation = true;
} else {
- vmas[i]->skip_invalidation = false;
- vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
+ vma->skip_invalidation = false;
+ loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
/* Till multi-device support is not added migration_policy
* is of no use and can be ignored.
*/
- vmas[i]->attr.preferred_loc.migration_policy =
- op->preferred_mem_loc.migration_policy;
- vmas[i]->attr.preferred_loc.dpagemap = NULL;
+ loc->migration_policy = op->preferred_mem_loc.migration_policy;
+ loc->dpagemap = NULL;
}
}
}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 5313bf2afa54..a83a0bda6861 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -56,7 +56,7 @@ struct xe_vm_pgtable_update_op;
*/
struct xe_vma_mem_attr {
/** @preferred_loc: perferred memory_location */
- struct {
+ struct xe_vma_preferred_loc {
/** @preferred_loc.migration_policy: Pages migration policy */
u32 migration_policy;
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc()
2025-10-25 12:04 ` [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc() Thomas Hellström
@ 2025-10-27 23:14 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-27 23:14 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:08PM +0200, Thomas Hellström wrote:
> Simplify madvise_preferred_mem_loc by removing repetitive patterns
> in favour of local variables.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Feel free to send a version of patch out on drm-tip, keep the RB, and
merge it as this change can go in ahead of the entire series.
Matt
> ---
> drivers/gpu/drm/xe/xe_vm_madvise.c | 21 +++++++++++----------
> drivers/gpu/drm/xe/xe_vm_types.h | 2 +-
> 2 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index 9553008409d1..d6f47c8e146d 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -81,21 +81,22 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
> xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
>
> for (i = 0; i < num_vmas; i++) {
> + struct xe_vma *vma = vmas[i];
> + struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
> +
> /*TODO: Extend attributes to bo based vmas */
> - if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
> - vmas[i]->attr.preferred_loc.migration_policy ==
> - op->preferred_mem_loc.migration_policy) ||
> - !xe_vma_is_cpu_addr_mirror(vmas[i])) {
> - vmas[i]->skip_invalidation = true;
> + if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
> + loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
> + !xe_vma_is_cpu_addr_mirror(vma)) {
> + vma->skip_invalidation = true;
> } else {
> - vmas[i]->skip_invalidation = false;
> - vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
> + vma->skip_invalidation = false;
> + loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
> /* Till multi-device support is not added migration_policy
> * is of no use and can be ignored.
> */
> - vmas[i]->attr.preferred_loc.migration_policy =
> - op->preferred_mem_loc.migration_policy;
> - vmas[i]->attr.preferred_loc.dpagemap = NULL;
> + loc->migration_policy = op->preferred_mem_loc.migration_policy;
> + loc->dpagemap = NULL;
> }
> }
> }
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 5313bf2afa54..a83a0bda6861 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -56,7 +56,7 @@ struct xe_vm_pgtable_update_op;
> */
> struct xe_vma_mem_attr {
> /** @preferred_loc: perferred memory_location */
> - struct {
> + struct xe_vma_preferred_loc {
> /** @preferred_loc.migration_policy: Pages migration policy */
> u32 migration_policy;
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (10 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc() Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-28 0:51 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
` (2 subsequent siblings)
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Use fds to represent pagemaps on foreign or local devices.
The underlying files are opened at madvise() time and remain open
as long as there are remaining madvises pointing to the
foreign pagemap.
Extend the madvise preferred_location UAPI to support the region
instance to identify the foreign placement.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 14 ++++++
drivers/gpu/drm/xe/xe_device.h | 2 +
drivers/gpu/drm/xe/xe_svm.c | 73 ++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_svm.h | 7 +++
drivers/gpu/drm/xe/xe_vm_madvise.c | 72 ++++++++++++++++++++++++-----
include/uapi/drm/xe_drm.h | 4 +-
6 files changed, 159 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ad004aab67ce..1a7502e4fc3e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -372,6 +372,20 @@ static const struct file_operations xe_driver_fops = {
.fop_flags = FOP_UNSIGNED_OFFSET,
};
+/**
+ * xe_is_xe_file() - Is the file an xe device file?
+ * @file: The file.
+ *
+ * Checks whether the file is opened against
+ * an xe device.
+ *
+ * Return: %true if an xe file, %false if not.
+ */
+bool xe_is_xe_file(const struct file *file)
+{
+ return file->f_op == &xe_driver_fops;
+}
+
static struct drm_driver driver = {
/* Don't use MTRRs here; the Xserver or userspace app should
* deal with them for Intel hardware.
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 32cc6323b7f6..475e2245c955 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -195,6 +195,8 @@ void xe_file_put(struct xe_file *xef);
int xe_is_injection_active(void);
+bool xe_is_xe_file(const struct file *file);
+
/*
* Occasionally it is seen that the G2H worker starts running after a delay of more than
* a second even after being queued and activated by the Linux workqueue subsystem. This
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 36a6ac293e71..9dd96dad2cca 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1763,6 +1763,73 @@ int xe_pagemap_cache_create(struct xe_tile *tile)
return 0;
}
+static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance)
+{
+ u32 tile_id = region_instance - 1;
+ struct xe_pagemap *xpagemap;
+ struct xe_vram_region *vr;
+
+ if (tile_id >= xe->info.tile_count)
+ return ERR_PTR(-ENOENT);
+
+ if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask))
+ return ERR_PTR(-ENOENT);
+
+ vr = xe_tile_to_vr(&xe->tiles[tile_id]);
+ xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
+ if (IS_ERR(xpagemap))
+ return ERR_CAST(xpagemap);
+
+ return &xpagemap->dpagemap;
+}
+
+/**
+ * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a
+ * (file_descriptor, region_instance) pair.
+ * @fd: An fd opened against an xe device.
+ * @region_instance: The region instance representing the device memory
+ * on the opened xe device.
+ *
+ * Opens a struct drm_pagemap pointer on the
+ * indicated device and region_instance.
+ *
+ * Return: A reference-counted struct drm_pagemap pointer on success,
+ * negative error pointer on failure.
+ */
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ struct drm_pagemap *dpagemap;
+ struct file *file;
+ struct drm_file *fpriv;
+ struct drm_device *drm;
+ int idx;
+
+ if (fd <= 0)
+ return ERR_PTR(-EINVAL);
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-ENOENT);
+
+ if (!xe_is_xe_file(file)) {
+ dpagemap = ERR_PTR(-ENOENT);
+ goto out;
+ }
+
+ fpriv = file->private_data;
+ drm = fpriv->minor->dev;
+ if (!drm_dev_enter(drm, &idx)) {
+ dpagemap = ERR_PTR(-ENODEV);
+ goto out;
+ }
+
+ dpagemap = xe_devmem_open(to_xe_device(drm), region_instance);
+ drm_dev_exit(idx);
+out:
+ fput(file);
+ return dpagemap;
+}
+
#else
int xe_pagemap_shrinker_create(struct xe_device *xe)
@@ -1786,6 +1853,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
{
return NULL;
}
+
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ return ERR_PTR(-ENOENT);
+}
+
#endif
/**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index c7027facf6e9..7cd7932f56c8 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -187,6 +187,8 @@ int xe_pagemap_shrinker_create(struct xe_device *xe);
int xe_pagemap_cache_create(struct xe_tile *tile);
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance);
+
#else
#include <linux/interval_tree.h>
#include "xe_vm.h"
@@ -378,6 +380,11 @@ static inline int xe_pagemap_cache_create(struct xe_tile *tile)
return 0;
}
+static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ return ERR_PTR(-ENOENT);
+}
+
#define xe_svm_range_has_dma_mapping(...) false
#endif /* CONFIG_DRM_XE_GPUSVM */
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index d6f47c8e146d..d03d052fcc44 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range {
bool has_svm_userptr_vmas;
};
+/**
+ * struct xe_madvise_details - Argument to madvise_funcs
+ * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
+ *
+ * The madvise IOCTL handler may, in addition to the user-space
+ * args, have additional info to pass into the madvise_func that
+ * handles the madvise type. Use a struct_xe_madvise_details
+ * for that and extend the struct as necessary.
+ */
+struct xe_madvise_details {
+ struct drm_pagemap *dpagemap;
+};
+
static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
{
u64 addr = madvise_range->addr;
@@ -74,7 +87,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r
static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
int i;
@@ -96,14 +110,18 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
* is of no use and can be ignored.
*/
loc->migration_policy = op->preferred_mem_loc.migration_policy;
+ drm_pagemap_put(loc->dpagemap);
loc->dpagemap = NULL;
+ if (details->dpagemap)
+ loc->dpagemap = drm_pagemap_get(details->dpagemap);
}
}
}
static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
struct xe_bo *bo;
int i;
@@ -144,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
int i;
@@ -162,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op);
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details);
static const madvise_func madvise_funcs[] = {
[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
@@ -250,9 +270,6 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
return false;
- if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
- return false;
-
if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
return false;
break;
@@ -296,6 +313,31 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
return true;
}
+static int xe_madvise_details_init(struct xe_device *xe, const struct drm_xe_madvise *args,
+ struct xe_madvise_details *details)
+{
+ memset(details, 0, sizeof(*details));
+
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
+ int fd = args->preferred_mem_loc.devmem_fd;
+
+ if (fd <= 0)
+ return 0;
+
+ details->dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
+ args->preferred_mem_loc.region_instance);
+ if (XE_IOCTL_DBG(xe, IS_ERR(details->dpagemap)))
+ return PTR_ERR(details->dpagemap);
+ }
+
+ return 0;
+}
+
+static void xe_madvise_details_fini(struct xe_madvise_details *details)
+{
+ drm_pagemap_put(details->dpagemap);
+}
+
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -349,6 +391,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct drm_xe_madvise *args = data;
struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
.range = args->range, };
+ struct xe_madvise_details details;
struct xe_vm *vm;
struct drm_exec exec;
int err, attr_type;
@@ -373,13 +416,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
goto unlock_vm;
}
- err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ err = xe_madvise_details_init(xe, args, &details);
if (err)
goto unlock_vm;
+ err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ if (err)
+ goto madv_fini;
+
err = get_vmas(vm, &madvise_range);
if (err || !madvise_range.num_vmas)
- goto unlock_vm;
+ goto madv_fini;
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
@@ -387,7 +434,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
madvise_range.num_vmas,
args->atomic.val)) {
err = -EINVAL;
- goto unlock_vm;
+ goto madv_fini;
}
}
@@ -413,7 +460,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
- madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
+ madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
+ &details);
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
@@ -425,6 +473,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
drm_exec_fini(&exec);
kfree(madvise_range.vmas);
madvise_range.vmas = NULL;
+madv_fini:
+ xe_madvise_details_fini(&details);
unlock_vm:
up_write(&vm->lock);
put_vm:
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 47853659a705..c79de1019816 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -2079,8 +2079,8 @@ struct drm_xe_madvise {
/** @preferred_mem_loc.migration_policy: Page migration policy */
__u16 migration_policy;
- /** @preferred_mem_loc.pad : MBZ */
- __u16 pad;
+ /** @preferred_mem_loc.region_instance : Region instance */
+ __u16 region_instance;
/** @preferred_mem_loc.reserved : Reserved */
__u64 reserved;
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm
2025-10-25 12:04 ` [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm Thomas Hellström
@ 2025-10-28 0:51 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 0:51 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:09PM +0200, Thomas Hellström wrote:
> Use fds to represent pagemaps on foreign or local devices.
> The underlying files are opened at madvise() time and remain open
> as long as there are remaining madvises pointing to the
> foreign pagemap.
>
> Extend the madvise preferred_location UAPI to support the region
> instance to identify the foreign placement.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_device.c | 14 ++++++
> drivers/gpu/drm/xe/xe_device.h | 2 +
> drivers/gpu/drm/xe/xe_svm.c | 73 ++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_svm.h | 7 +++
> drivers/gpu/drm/xe/xe_vm_madvise.c | 72 ++++++++++++++++++++++++-----
> include/uapi/drm/xe_drm.h | 4 +-
> 6 files changed, 159 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index ad004aab67ce..1a7502e4fc3e 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -372,6 +372,20 @@ static const struct file_operations xe_driver_fops = {
> .fop_flags = FOP_UNSIGNED_OFFSET,
> };
>
> +/**
> + * xe_is_xe_file() - Is the file an xe device file?
> + * @file: The file.
> + *
> + * Checks whether the file is opened against
> + * an xe device.
> + *
> + * Return: %true if an xe file, %false if not.
> + */
> +bool xe_is_xe_file(const struct file *file)
> +{
> + return file->f_op == &xe_driver_fops;
> +}
> +
> static struct drm_driver driver = {
> /* Don't use MTRRs here; the Xserver or userspace app should
> * deal with them for Intel hardware.
> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
> index 32cc6323b7f6..475e2245c955 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -195,6 +195,8 @@ void xe_file_put(struct xe_file *xef);
>
> int xe_is_injection_active(void);
>
> +bool xe_is_xe_file(const struct file *file);
> +
> /*
> * Occasionally it is seen that the G2H worker starts running after a delay of more than
> * a second even after being queued and activated by the Linux workqueue subsystem. This
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 36a6ac293e71..9dd96dad2cca 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -1763,6 +1763,73 @@ int xe_pagemap_cache_create(struct xe_tile *tile)
> return 0;
> }
>
> +static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance)
> +{
> + u32 tile_id = region_instance - 1;
> + struct xe_pagemap *xpagemap;
> + struct xe_vram_region *vr;
> +
> + if (tile_id >= xe->info.tile_count)
> + return ERR_PTR(-ENOENT);
> +
> + if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask))
> + return ERR_PTR(-ENOENT);
> +
> + vr = xe_tile_to_vr(&xe->tiles[tile_id]);
> + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
This is from a different patch, but I was trying to trace where the
reference drop to the drm_pagemap in xe_madvise_details_fini comes from.
I figured out it was from the function above, but I didn’t see anything
in the kernel documentation for xe_pagemap_find_or_create indicating
that it takes a reference to the drm_pagemap.
I’d suggest adding that for completeness.
> + if (IS_ERR(xpagemap))
> + return ERR_CAST(xpagemap);
> +
> + return &xpagemap->dpagemap;
> +}
> +
> +/**
> + * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a
> + * (file_descriptor, region_instance) pair.
> + * @fd: An fd opened against an xe device.
> + * @region_instance: The region instance representing the device memory
> + * on the opened xe device.
> + *
> + * Opens a struct drm_pagemap pointer on the
> + * indicated device and region_instance.
> + *
> + * Return: A reference-counted struct drm_pagemap pointer on success,
> + * negative error pointer on failure.
> + */
> +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
> +{
> + struct drm_pagemap *dpagemap;
> + struct file *file;
> + struct drm_file *fpriv;
> + struct drm_device *drm;
> + int idx;
> +
> + if (fd <= 0)
> + return ERR_PTR(-EINVAL);
> +
> + file = fget(fd);
> + if (!file)
> + return ERR_PTR(-ENOENT);
> +
> + if (!xe_is_xe_file(file)) {
> + dpagemap = ERR_PTR(-ENOENT);
> + goto out;
> + }
> +
> + fpriv = file->private_data;
> + drm = fpriv->minor->dev;
> + if (!drm_dev_enter(drm, &idx)) {
> + dpagemap = ERR_PTR(-ENODEV);
> + goto out;
> + }
> +
> + dpagemap = xe_devmem_open(to_xe_device(drm), region_instance);
> + drm_dev_exit(idx);
> +out:
> + fput(file);
> + return dpagemap;
> +}
> +
> #else
>
> int xe_pagemap_shrinker_create(struct xe_device *xe)
> @@ -1786,6 +1853,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> {
> return NULL;
> }
> +
> +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
> +{
> + return ERR_PTR(-ENOENT);
> +}
> +
> #endif
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index c7027facf6e9..7cd7932f56c8 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -187,6 +187,8 @@ int xe_pagemap_shrinker_create(struct xe_device *xe);
>
> int xe_pagemap_cache_create(struct xe_tile *tile);
>
> +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance);
> +
> #else
> #include <linux/interval_tree.h>
> #include "xe_vm.h"
> @@ -378,6 +380,11 @@ static inline int xe_pagemap_cache_create(struct xe_tile *tile)
> return 0;
> }
>
> +static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
> +{
> + return ERR_PTR(-ENOENT);
> +}
> +
> #define xe_svm_range_has_dma_mapping(...) false
> #endif /* CONFIG_DRM_XE_GPUSVM */
>
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index d6f47c8e146d..d03d052fcc44 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range {
> bool has_svm_userptr_vmas;
> };
>
> +/**
> + * struct xe_madvise_details - Argument to madvise_funcs
> + * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
> + *
> + * The madvise IOCTL handler may, in addition to the user-space
> + * args, have additional info to pass into the madvise_func that
> + * handles the madvise type. Use a struct_xe_madvise_details
> + * for that and extend the struct as necessary.
> + */
> +struct xe_madvise_details {
> + struct drm_pagemap *dpagemap;
> +};
> +
> static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
> {
> u64 addr = madvise_range->addr;
> @@ -74,7 +87,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r
>
> static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
> struct xe_vma **vmas, int num_vmas,
> - struct drm_xe_madvise *op)
> + struct drm_xe_madvise *op,
> + struct xe_madvise_details *details)
> {
> int i;
>
> @@ -96,14 +110,18 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
> * is of no use and can be ignored.
> */
> loc->migration_policy = op->preferred_mem_loc.migration_policy;
> + drm_pagemap_put(loc->dpagemap);
> loc->dpagemap = NULL;
> + if (details->dpagemap)
> + loc->dpagemap = drm_pagemap_get(details->dpagemap);
> }
> }
> }
>
> static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
> struct xe_vma **vmas, int num_vmas,
> - struct drm_xe_madvise *op)
> + struct drm_xe_madvise *op,
> + struct xe_madvise_details *details)
> {
> struct xe_bo *bo;
> int i;
> @@ -144,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
>
> static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
> struct xe_vma **vmas, int num_vmas,
> - struct drm_xe_madvise *op)
> + struct drm_xe_madvise *op,
> + struct xe_madvise_details *details)
> {
> int i;
>
> @@ -162,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
>
> typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
> struct xe_vma **vmas, int num_vmas,
> - struct drm_xe_madvise *op);
> + struct drm_xe_madvise *op,
> + struct xe_madvise_details *details);
>
> static const madvise_func madvise_funcs[] = {
> [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
> @@ -250,9 +270,6 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
> DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
> return false;
>
> - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
> - return false;
> -
Should we still reject region_instance if fd <=0 ?
> if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
> return false;
> break;
> @@ -296,6 +313,31 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
> return true;
> }
>
> +static int xe_madvise_details_init(struct xe_device *xe, const struct drm_xe_madvise *args,
> + struct xe_madvise_details *details)
> +{
> + memset(details, 0, sizeof(*details));
> +
> + if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
> + int fd = args->preferred_mem_loc.devmem_fd;
> +
> + if (fd <= 0)
> + return 0;
> +
I think you need to santize 'args->preferred_mem_loc.region_instance'
somewhere and reject 0 (system memory) or xe_devmem_open is blow up as
tile_id will be -1 in that function.
> + details->dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
> + args->preferred_mem_loc.region_instance);
You have local fd varibale here, but don't use it. Should we also have
local region_instance to avoid bigs wraps?
> + if (XE_IOCTL_DBG(xe, IS_ERR(details->dpagemap)))
> + return PTR_ERR(details->dpagemap);
> + }
> +
> + return 0;
> +}
> +
> +static void xe_madvise_details_fini(struct xe_madvise_details *details)
> +{
> + drm_pagemap_put(details->dpagemap);
> +}
> +
> static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
> int num_vmas, u32 atomic_val)
> {
> @@ -349,6 +391,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
> struct drm_xe_madvise *args = data;
> struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
> .range = args->range, };
> + struct xe_madvise_details details;
> struct xe_vm *vm;
> struct drm_exec exec;
> int err, attr_type;
> @@ -373,13 +416,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
> goto unlock_vm;
> }
>
> - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
> + err = xe_madvise_details_init(xe, args, &details);
> if (err)
> goto unlock_vm;
>
> + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
> + if (err)
> + goto madv_fini;
> +
> err = get_vmas(vm, &madvise_range);
> if (err || !madvise_range.num_vmas)
> - goto unlock_vm;
> + goto madv_fini;
>
> if (madvise_range.has_bo_vmas) {
> if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
> @@ -387,7 +434,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
> madvise_range.num_vmas,
> args->atomic.val)) {
> err = -EINVAL;
> - goto unlock_vm;
> + goto madv_fini;
> }
> }
>
> @@ -413,7 +460,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
> }
>
> attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
> - madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
> + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
> + &details);
>
> err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
>
> @@ -425,6 +473,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
> drm_exec_fini(&exec);
> kfree(madvise_range.vmas);
> madvise_range.vmas = NULL;
> +madv_fini:
> + xe_madvise_details_fini(&details);
> unlock_vm:
> up_write(&vm->lock);
> put_vm:
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 47853659a705..c79de1019816 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -2079,8 +2079,8 @@ struct drm_xe_madvise {
> /** @preferred_mem_loc.migration_policy: Page migration policy */
> __u16 migration_policy;
>
> - /** @preferred_mem_loc.pad : MBZ */
> - __u16 pad;
> + /** @preferred_mem_loc.region_instance : Region instance */
> + __u16 region_instance;
I'd mention here this field is only relavent if devmem_fd > 0, perhaps a
little more for its usage. Also perhaps system_memory regions are not
allowed (assuming we land on that).
Matt
>
> /** @preferred_mem_loc.reserved : Reserved */
> __u64 reserved;
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (11 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-28 1:14 ` Matthew Brost
2025-10-29 2:17 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout Thomas Hellström
2025-10-25 12:04 ` [PATCH 15/15] drm/xe: Retry migration once Thomas Hellström
14 siblings, 2 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Mimic the dma-buf method using dma_[map|unmap]_resource to map
for pcie-p2p dma.
There's an ongoing area of work upstream to sort out how this best
should be done. One method proposed is to add an additional
pci_p2p_dma_pagemap aliasing the device_private pagemap and use
the corresponding pci_p2p_dma_pagemap page as input for
dma_map_page(). However, that would incur double the amount of
memory and latency to set up the drm_pagemap and given the huge
amount of memory present on modern GPUs, that would really not work.
Hence the simple approach used in this patch.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_svm.c | 44 ++++++++++++++++++++++++++++++++++---
drivers/gpu/drm/xe/xe_svm.h | 1 +
2 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 9dd96dad2cca..9814f95cb212 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -3,6 +3,8 @@
* Copyright © 2024 Intel Corporation
*/
+#include <linux/pci-p2pdma.h>
+
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
#include <drm/drm_pagemap.h>
@@ -442,6 +444,24 @@ static u64 xe_page_to_dpa(struct page *page)
return dpa;
}
+static u64 xe_page_to_pcie(struct page *page)
+{
+ struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
+ u64 hpa_base = xpagemap->hpa_base;
+ u64 ioaddr;
+ u64 pfn = page_to_pfn(page);
+ u64 offset;
+
+ xe_assert(vr->xe, is_device_private_page(page));
+ xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
+
+ offset = (pfn << PAGE_SHIFT) - hpa_base;
+ ioaddr = vr->io_start + offset;
+
+ return ioaddr;
+}
+
enum xe_svm_copy_dir {
XE_SVM_COPY_TO_VRAM,
XE_SVM_COPY_TO_SRAM,
@@ -793,7 +813,10 @@ static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
struct device *dev1 = xe_peer_to_dev(peer1);
struct device *dev2 = xe_peer_to_dev(peer2);
- return dev1 == dev2;
+ if (dev1 == dev2)
+ return true;
+
+ return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0;
}
static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
@@ -1530,13 +1553,27 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
addr = xe_page_to_dpa(page);
prot = XE_INTERCONNECT_VRAM;
} else {
- addr = DMA_MAPPING_ERROR;
- prot = 0;
+ addr = dma_map_resource(dev,
+ xe_page_to_pcie(page),
+ PAGE_SIZE << order, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ prot = XE_INTERCONNECT_P2P;
}
return drm_pagemap_addr_encode(addr, prot, order, dir);
}
+static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
+ struct device *dev,
+ struct drm_pagemap_addr addr)
+{
+ if (addr.proto != XE_INTERCONNECT_P2P)
+ return;
+
+ dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order,
+ addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
static void xe_pagemap_destroy_work(struct work_struct *work)
{
struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
@@ -1573,6 +1610,7 @@ static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or
static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.device_map = xe_drm_pagemap_device_map,
+ .device_unmap = xe_drm_pagemap_device_unmap,
.populate_mm = xe_drm_pagemap_populate_mm,
.destroy = xe_pagemap_destroy,
};
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 7cd7932f56c8..f5ed48993b6d 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -13,6 +13,7 @@
#include <drm/drm_pagemap_util.h>
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
+#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
struct drm_device;
struct drm_file;
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect
2025-10-25 12:04 ` [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
@ 2025-10-28 1:14 ` Matthew Brost
2025-10-28 9:32 ` Thomas Hellström
2025-10-29 2:17 ` Matthew Brost
1 sibling, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 1:14 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:10PM +0200, Thomas Hellström wrote:
> Mimic the dma-buf method using dma_[map|unmap]_resource to map
> for pcie-p2p dma.
>
> There's an ongoing area of work upstream to sort out how this best
> should be done. One method proposed is to add an additional
> pci_p2p_dma_pagemap aliasing the device_private pagemap and use
> the corresponding pci_p2p_dma_pagemap page as input for
> dma_map_page(). However, that would incur double the amount of
> memory and latency to set up the drm_pagemap and given the huge
> amount of memory present on modern GPUs, that would really not work.
> Hence the simple approach used in this patch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 44 ++++++++++++++++++++++++++++++++++---
> drivers/gpu/drm/xe/xe_svm.h | 1 +
> 2 files changed, 42 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 9dd96dad2cca..9814f95cb212 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -3,6 +3,8 @@
> * Copyright © 2024 Intel Corporation
> */
>
> +#include <linux/pci-p2pdma.h>
> +
> #include <drm/drm_drv.h>
> #include <drm/drm_managed.h>
> #include <drm/drm_pagemap.h>
> @@ -442,6 +444,24 @@ static u64 xe_page_to_dpa(struct page *page)
> return dpa;
> }
>
> +static u64 xe_page_to_pcie(struct page *page)
> +{
> + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
> + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> + u64 hpa_base = xpagemap->hpa_base;
> + u64 ioaddr;
> + u64 pfn = page_to_pfn(page);
> + u64 offset;
> +
> + xe_assert(vr->xe, is_device_private_page(page));
> + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
> +
> + offset = (pfn << PAGE_SHIFT) - hpa_base;
> + ioaddr = vr->io_start + offset;
> +
> + return ioaddr;
> +}
> +
> enum xe_svm_copy_dir {
> XE_SVM_COPY_TO_VRAM,
> XE_SVM_COPY_TO_SRAM,
> @@ -793,7 +813,10 @@ static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
> struct device *dev1 = xe_peer_to_dev(peer1);
> struct device *dev2 = xe_peer_to_dev(peer2);
>
> - return dev1 == dev2;
> + if (dev1 == dev2)
> + return true;
> +
> + return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0;
> }
>
> static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
> @@ -1530,13 +1553,27 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
This relates to my comment here [1]. Perhaps this is where we should
build in the “map for atomic” logic and route it through get_pages? I
think that could work quite nicely and wouldn’t require an additional
“do these two page maps support atomics?” check.
What do you think?
Matt
[1] https://patchwork.freedesktop.org/patch/683511/?series=156525&rev=1#comment_1255409
> addr = xe_page_to_dpa(page);
> prot = XE_INTERCONNECT_VRAM;
> } else {
> - addr = DMA_MAPPING_ERROR;
> - prot = 0;
> + addr = dma_map_resource(dev,
> + xe_page_to_pcie(page),
> + PAGE_SIZE << order, dir,
> + DMA_ATTR_SKIP_CPU_SYNC);
> + prot = XE_INTERCONNECT_P2P;
> }
>
> return drm_pagemap_addr_encode(addr, prot, order, dir);
> }
>
> +static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
> + struct device *dev,
> + struct drm_pagemap_addr addr)
> +{
> + if (addr.proto != XE_INTERCONNECT_P2P)
> + return;
> +
> + dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order,
> + addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
> +}
> +
> static void xe_pagemap_destroy_work(struct work_struct *work)
> {
> struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
> @@ -1573,6 +1610,7 @@ static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or
>
> static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> .device_map = xe_drm_pagemap_device_map,
> + .device_unmap = xe_drm_pagemap_device_unmap,
> .populate_mm = xe_drm_pagemap_populate_mm,
> .destroy = xe_pagemap_destroy,
> };
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index 7cd7932f56c8..f5ed48993b6d 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -13,6 +13,7 @@
> #include <drm/drm_pagemap_util.h>
>
> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
> +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
>
> struct drm_device;
> struct drm_file;
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect
2025-10-28 1:14 ` Matthew Brost
@ 2025-10-28 9:32 ` Thomas Hellström
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-28 9:32 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Mon, 2025-10-27 at 18:14 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:10PM +0200, Thomas Hellström wrote:
> > Mimic the dma-buf method using dma_[map|unmap]_resource to map
> > for pcie-p2p dma.
> >
> > There's an ongoing area of work upstream to sort out how this best
> > should be done. One method proposed is to add an additional
> > pci_p2p_dma_pagemap aliasing the device_private pagemap and use
> > the corresponding pci_p2p_dma_pagemap page as input for
> > dma_map_page(). However, that would incur double the amount of
> > memory and latency to set up the drm_pagemap and given the huge
> > amount of memory present on modern GPUs, that would really not
> > work.
> > Hence the simple approach used in this patch.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_svm.c | 44
> > ++++++++++++++++++++++++++++++++++---
> > drivers/gpu/drm/xe/xe_svm.h | 1 +
> > 2 files changed, 42 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 9dd96dad2cca..9814f95cb212 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -3,6 +3,8 @@
> > * Copyright © 2024 Intel Corporation
> > */
> >
> > +#include <linux/pci-p2pdma.h>
> > +
> > #include <drm/drm_drv.h>
> > #include <drm/drm_managed.h>
> > #include <drm/drm_pagemap.h>
> > @@ -442,6 +444,24 @@ static u64 xe_page_to_dpa(struct page *page)
> > return dpa;
> > }
> >
> > +static u64 xe_page_to_pcie(struct page *page)
> > +{
> > + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
> > + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> > + u64 hpa_base = xpagemap->hpa_base;
> > + u64 ioaddr;
> > + u64 pfn = page_to_pfn(page);
> > + u64 offset;
> > +
> > + xe_assert(vr->xe, is_device_private_page(page));
> > + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
> > +
> > + offset = (pfn << PAGE_SHIFT) - hpa_base;
> > + ioaddr = vr->io_start + offset;
> > +
> > + return ioaddr;
> > +}
> > +
> > enum xe_svm_copy_dir {
> > XE_SVM_COPY_TO_VRAM,
> > XE_SVM_COPY_TO_SRAM,
> > @@ -793,7 +813,10 @@ static bool xe_has_interconnect(struct
> > drm_pagemap_peer *peer1,
> > struct device *dev1 = xe_peer_to_dev(peer1);
> > struct device *dev2 = xe_peer_to_dev(peer2);
> >
> > - return dev1 == dev2;
> > + if (dev1 == dev2)
> > + return true;
> > +
> > + return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true)
> > >= 0;
> > }
> >
> > static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
> > @@ -1530,13 +1553,27 @@ xe_drm_pagemap_device_map(struct
> > drm_pagemap *dpagemap,
>
> This relates to my comment here [1]. Perhaps this is where we should
> build in the “map for atomic” logic and route it through get_pages? I
> think that could work quite nicely and wouldn’t require an additional
> “do these two page maps support atomics?” check.
> What do you think?
I think initially we should just make sure we use a local dpagemap for
atomic. When we implement atomic-supporting fast interconnects we could
look at something like this. In the migrate code we could easily
migrate what's needed to migrate for atomic support. For get_pages(),
we also need a check, but wouldn't it then be sufficient to verify that
all pages in a range are from the same dpagemap, like we do today and
in addition verify that the dpagemap in question supports atomic
operations?
/Thomas
>
> Matt
>
> [1]
> https://patchwork.freedesktop.org/patch/683511/?series=156525&rev=1#comment_1255409
>
> > addr = xe_page_to_dpa(page);
> > prot = XE_INTERCONNECT_VRAM;
> > } else {
> > - addr = DMA_MAPPING_ERROR;
> > - prot = 0;
> > + addr = dma_map_resource(dev,
> > + xe_page_to_pcie(page),
> > + PAGE_SIZE << order, dir,
> > + DMA_ATTR_SKIP_CPU_SYNC);
> > + prot = XE_INTERCONNECT_P2P;
> > }
> >
> > return drm_pagemap_addr_encode(addr, prot, order, dir);
> > }
> >
> > +static void xe_drm_pagemap_device_unmap(struct drm_pagemap
> > *dpagemap,
> > + struct device *dev,
> > + struct drm_pagemap_addr
> > addr)
> > +{
> > + if (addr.proto != XE_INTERCONNECT_P2P)
> > + return;
> > +
> > + dma_unmap_resource(dev, addr.addr, PAGE_SIZE <<
> > addr.order,
> > + addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
> > +}
> > +
> > static void xe_pagemap_destroy_work(struct work_struct *work)
> > {
> > struct xe_pagemap *xpagemap = container_of(work,
> > typeof(*xpagemap), destroy_work);
> > @@ -1573,6 +1610,7 @@ static void xe_pagemap_destroy(struct
> > drm_pagemap *dpagemap, bool from_atomic_or
> >
> > static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> > .device_map = xe_drm_pagemap_device_map,
> > + .device_unmap = xe_drm_pagemap_device_unmap,
> > .populate_mm = xe_drm_pagemap_populate_mm,
> > .destroy = xe_pagemap_destroy,
> > };
> > diff --git a/drivers/gpu/drm/xe/xe_svm.h
> > b/drivers/gpu/drm/xe/xe_svm.h
> > index 7cd7932f56c8..f5ed48993b6d 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.h
> > +++ b/drivers/gpu/drm/xe/xe_svm.h
> > @@ -13,6 +13,7 @@
> > #include <drm/drm_pagemap_util.h>
> >
> > #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
> > +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
> >
> > struct drm_device;
> > struct drm_file;
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect
2025-10-25 12:04 ` [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
2025-10-28 1:14 ` Matthew Brost
@ 2025-10-29 2:17 ` Matthew Brost
2025-10-29 14:54 ` Thomas Hellström
1 sibling, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-29 2:17 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:10PM +0200, Thomas Hellström wrote:
> Mimic the dma-buf method using dma_[map|unmap]_resource to map
> for pcie-p2p dma.
>
> There's an ongoing area of work upstream to sort out how this best
> should be done. One method proposed is to add an additional
> pci_p2p_dma_pagemap aliasing the device_private pagemap and use
> the corresponding pci_p2p_dma_pagemap page as input for
> dma_map_page(). However, that would incur double the amount of
> memory and latency to set up the drm_pagemap and given the huge
> amount of memory present on modern GPUs, that would really not work.
> Hence the simple approach used in this patch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 44 ++++++++++++++++++++++++++++++++++---
> drivers/gpu/drm/xe/xe_svm.h | 1 +
> 2 files changed, 42 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 9dd96dad2cca..9814f95cb212 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -3,6 +3,8 @@
> * Copyright © 2024 Intel Corporation
> */
>
> +#include <linux/pci-p2pdma.h>
> +
> #include <drm/drm_drv.h>
> #include <drm/drm_managed.h>
> #include <drm/drm_pagemap.h>
> @@ -442,6 +444,24 @@ static u64 xe_page_to_dpa(struct page *page)
> return dpa;
> }
>
> +static u64 xe_page_to_pcie(struct page *page)
> +{
This function looks almost exactly the same as xe_page_to_dpa, maybe
extract out the common parts?
Everything else LGTM.
Matt
> + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
> + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> + u64 hpa_base = xpagemap->hpa_base;
> + u64 ioaddr;
> + u64 pfn = page_to_pfn(page);
> + u64 offset;
> +
> + xe_assert(vr->xe, is_device_private_page(page));
> + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
> +
> + offset = (pfn << PAGE_SHIFT) - hpa_base;
> + ioaddr = vr->io_start + offset;
> +
> + return ioaddr;
> +}
> +
> enum xe_svm_copy_dir {
> XE_SVM_COPY_TO_VRAM,
> XE_SVM_COPY_TO_SRAM,
> @@ -793,7 +813,10 @@ static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
> struct device *dev1 = xe_peer_to_dev(peer1);
> struct device *dev2 = xe_peer_to_dev(peer2);
>
> - return dev1 == dev2;
> + if (dev1 == dev2)
> + return true;
> +
> + return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0;
> }
>
> static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
> @@ -1530,13 +1553,27 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
> addr = xe_page_to_dpa(page);
> prot = XE_INTERCONNECT_VRAM;
> } else {
> - addr = DMA_MAPPING_ERROR;
> - prot = 0;
> + addr = dma_map_resource(dev,
> + xe_page_to_pcie(page),
> + PAGE_SIZE << order, dir,
> + DMA_ATTR_SKIP_CPU_SYNC);
> + prot = XE_INTERCONNECT_P2P;
> }
>
> return drm_pagemap_addr_encode(addr, prot, order, dir);
> }
>
> +static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
> + struct device *dev,
> + struct drm_pagemap_addr addr)
> +{
> + if (addr.proto != XE_INTERCONNECT_P2P)
> + return;
> +
> + dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order,
> + addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
> +}
> +
> static void xe_pagemap_destroy_work(struct work_struct *work)
> {
> struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
> @@ -1573,6 +1610,7 @@ static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or
>
> static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> .device_map = xe_drm_pagemap_device_map,
> + .device_unmap = xe_drm_pagemap_device_unmap,
> .populate_mm = xe_drm_pagemap_populate_mm,
> .destroy = xe_pagemap_destroy,
> };
> diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
> index 7cd7932f56c8..f5ed48993b6d 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -13,6 +13,7 @@
> #include <drm/drm_pagemap_util.h>
>
> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
> +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
>
> struct drm_device;
> struct drm_file;
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect
2025-10-29 2:17 ` Matthew Brost
@ 2025-10-29 14:54 ` Thomas Hellström
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Hellström @ 2025-10-29 14:54 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, 2025-10-28 at 19:17 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:10PM +0200, Thomas Hellström wrote:
> > Mimic the dma-buf method using dma_[map|unmap]_resource to map
> > for pcie-p2p dma.
> >
> > There's an ongoing area of work upstream to sort out how this best
> > should be done. One method proposed is to add an additional
> > pci_p2p_dma_pagemap aliasing the device_private pagemap and use
> > the corresponding pci_p2p_dma_pagemap page as input for
> > dma_map_page(). However, that would incur double the amount of
> > memory and latency to set up the drm_pagemap and given the huge
> > amount of memory present on modern GPUs, that would really not
> > work.
> > Hence the simple approach used in this patch.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_svm.c | 44
> > ++++++++++++++++++++++++++++++++++---
> > drivers/gpu/drm/xe/xe_svm.h | 1 +
> > 2 files changed, 42 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 9dd96dad2cca..9814f95cb212 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -3,6 +3,8 @@
> > * Copyright © 2024 Intel Corporation
> > */
> >
> > +#include <linux/pci-p2pdma.h>
> > +
> > #include <drm/drm_drv.h>
> > #include <drm/drm_managed.h>
> > #include <drm/drm_pagemap.h>
> > @@ -442,6 +444,24 @@ static u64 xe_page_to_dpa(struct page *page)
> > return dpa;
> > }
> >
> > +static u64 xe_page_to_pcie(struct page *page)
> > +{
>
> This function looks almost exactly the same as xe_page_to_dpa, maybe
> extract out the common parts?
OK, I'll take a look at that.
/Thomas
>
> Everything else LGTM.
>
> Matt
>
> > + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
> > + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
> > + u64 hpa_base = xpagemap->hpa_base;
> > + u64 ioaddr;
> > + u64 pfn = page_to_pfn(page);
> > + u64 offset;
> > +
> > + xe_assert(vr->xe, is_device_private_page(page));
> > + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
> > +
> > + offset = (pfn << PAGE_SHIFT) - hpa_base;
> > + ioaddr = vr->io_start + offset;
> > +
> > + return ioaddr;
> > +}
> > +
> > enum xe_svm_copy_dir {
> > XE_SVM_COPY_TO_VRAM,
> > XE_SVM_COPY_TO_SRAM,
> > @@ -793,7 +813,10 @@ static bool xe_has_interconnect(struct
> > drm_pagemap_peer *peer1,
> > struct device *dev1 = xe_peer_to_dev(peer1);
> > struct device *dev2 = xe_peer_to_dev(peer2);
> >
> > - return dev1 == dev2;
> > + if (dev1 == dev2)
> > + return true;
> > +
> > + return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true)
> > >= 0;
> > }
> >
> > static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
> > @@ -1530,13 +1553,27 @@ xe_drm_pagemap_device_map(struct
> > drm_pagemap *dpagemap,
> > addr = xe_page_to_dpa(page);
> > prot = XE_INTERCONNECT_VRAM;
> > } else {
> > - addr = DMA_MAPPING_ERROR;
> > - prot = 0;
> > + addr = dma_map_resource(dev,
> > + xe_page_to_pcie(page),
> > + PAGE_SIZE << order, dir,
> > + DMA_ATTR_SKIP_CPU_SYNC);
> > + prot = XE_INTERCONNECT_P2P;
> > }
> >
> > return drm_pagemap_addr_encode(addr, prot, order, dir);
> > }
> >
> > +static void xe_drm_pagemap_device_unmap(struct drm_pagemap
> > *dpagemap,
> > + struct device *dev,
> > + struct drm_pagemap_addr
> > addr)
> > +{
> > + if (addr.proto != XE_INTERCONNECT_P2P)
> > + return;
> > +
> > + dma_unmap_resource(dev, addr.addr, PAGE_SIZE <<
> > addr.order,
> > + addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
> > +}
> > +
> > static void xe_pagemap_destroy_work(struct work_struct *work)
> > {
> > struct xe_pagemap *xpagemap = container_of(work,
> > typeof(*xpagemap), destroy_work);
> > @@ -1573,6 +1610,7 @@ static void xe_pagemap_destroy(struct
> > drm_pagemap *dpagemap, bool from_atomic_or
> >
> > static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
> > .device_map = xe_drm_pagemap_device_map,
> > + .device_unmap = xe_drm_pagemap_device_unmap,
> > .populate_mm = xe_drm_pagemap_populate_mm,
> > .destroy = xe_pagemap_destroy,
> > };
> > diff --git a/drivers/gpu/drm/xe/xe_svm.h
> > b/drivers/gpu/drm/xe/xe_svm.h
> > index 7cd7932f56c8..f5ed48993b6d 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.h
> > +++ b/drivers/gpu/drm/xe/xe_svm.h
> > @@ -13,6 +13,7 @@
> > #include <drm/drm_pagemap_util.h>
> >
> > #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
> > +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
> >
> > struct drm_device;
> > struct drm_file;
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (12 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-27 23:16 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 15/15] drm/xe: Retry migration once Thomas Hellström
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Add debug printout that is valueable for pagemap prefetch.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_vm.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 381d4b4abac9..c646afef131f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2912,6 +2912,10 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
if (!dpagemap)
xe_svm_range_migrate_to_smem(vm, svm_range);
+ drm_dbg(&vm->xe->drm, "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
+ dpagemap ? dpagemap->drm->unique : "system",
+ xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
+
if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!dpagemap)) {
err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
if (err) {
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout
2025-10-25 12:04 ` [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout Thomas Hellström
@ 2025-10-27 23:16 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-27 23:16 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:11PM +0200, Thomas Hellström wrote:
> Add debug printout that is valueable for pagemap prefetch.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/xe/xe_vm.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 381d4b4abac9..c646afef131f 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2912,6 +2912,10 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> if (!dpagemap)
> xe_svm_range_migrate_to_smem(vm, svm_range);
>
> + drm_dbg(&vm->xe->drm, "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
> + dpagemap ? dpagemap->drm->unique : "system",
> + xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
> +
> if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!dpagemap)) {
> err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
> if (err) {
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH 15/15] drm/xe: Retry migration once
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
` (13 preceding siblings ...)
2025-10-25 12:04 ` [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout Thomas Hellström
@ 2025-10-25 12:04 ` Thomas Hellström
2025-10-28 0:13 ` Matthew Brost
14 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-25 12:04 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, himal.prasad.ghimiray, apopple,
airlied, Simona Vetter, felix.kuehling, Matthew Brost,
Christian König, dakr, Mrozek, Michal, Joonas Lahtinen
Data present in foreign device memory may cause migration to fail.
For now, retry once after first migrating to system.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_svm.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 9814f95cb212..41e075aa015c 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1529,13 +1529,24 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
struct drm_pagemap *dpagemap)
{
+ int err, retries = 1;
+
xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
- return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
- xe_svm_range_end(range),
- range->base.gpusvm->mm,
- ctx->timeslice_ms);
+retry:
+ err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
+ xe_svm_range_end(range),
+ range->base.gpusvm->mm,
+ ctx->timeslice_ms);
+ if ((err == -EBUSY || err == -EFAULT) && retries--) {
+ range_debug(range, "ALLOCATE VRAM - Retry.");
+
+ drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
+ goto retry;
+ }
+
+ return err;
}
static struct drm_pagemap_addr
--
2.51.0
^ permalink raw reply related [flat|nested] 49+ messages in thread* Re: [PATCH 15/15] drm/xe: Retry migration once
2025-10-25 12:04 ` [PATCH 15/15] drm/xe: Retry migration once Thomas Hellström
@ 2025-10-28 0:13 ` Matthew Brost
2025-10-28 9:11 ` Thomas Hellström
0 siblings, 1 reply; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 0:13 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Sat, Oct 25, 2025 at 02:04:12PM +0200, Thomas Hellström wrote:
> Data present in foreign device memory may cause migration to fail.
> For now, retry once after first migrating to system.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 9814f95cb212..41e075aa015c 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -1529,13 +1529,24 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
> struct drm_pagemap *dpagemap)
> {
> + int err, retries = 1;
> +
> xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
> range_debug(range, "ALLOCATE VRAM");
>
> - return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
> - xe_svm_range_end(range),
> - range->base.gpusvm->mm,
> - ctx->timeslice_ms);
> +retry:
> + err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
> + xe_svm_range_end(range),
> + range->base.gpusvm->mm,
> + ctx->timeslice_ms);
> + if ((err == -EBUSY || err == -EFAULT) && retries--) {
I don't think this is what we want to do here. -EFAULT indicates that
the pages are entirely present somewhere in device memory. This could be
either on the local device or on a foreign device, but we don’t have
enough information here to determine which case it is.
If this is on our local device, we're always good. This could occur
playing mremap games.
If it's on a foreign device, things get trickier. If our interconnect
supports atomics (e.g., UAL), we're still good. But if the interconnect
doesn't support atomics (e.g., PCIe P2P), this an atomic fault, then we
need to move the memory. Also, if there's no path between device
memories, then of course we need to move the memory.
Again, we don’t have enough information here to make the correct
decision. We really need to call drm_gpusvm_range_get_pages to gather
the CPU pages in order to make this kind of decision. Ideally, the logic
should be built into drm_gpusvm_range_get_pages to understand atomic
migration requirements.
Once drm_gpusvm_range_get_pages returns, we can take appropriate action.
Initially, for simplicity, this might just be a bounce to system memory.
Later, it could evolve into a direct device-to-device move.
The logic inside drm_gpusvm_range_get_pages would likely involve
devmem_only combined with a drm_pagemap passed in, which can detect
connectivity and atomic support between devices—based on the drm_pagemap
extracted from the ZDD.
Let know if thia makes sense, or if you have thought about doing this in
a follow up.
Matt
> + range_debug(range, "ALLOCATE VRAM - Retry.");
> +
> + drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
> + goto retry;
> + }
> +
> + return err;
> }
>
> static struct drm_pagemap_addr
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 15/15] drm/xe: Retry migration once
2025-10-28 0:13 ` Matthew Brost
@ 2025-10-28 9:11 ` Thomas Hellström
2025-10-28 19:03 ` Matthew Brost
0 siblings, 1 reply; 49+ messages in thread
From: Thomas Hellström @ 2025-10-28 9:11 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
Hi, Matt
On Mon, 2025-10-27 at 17:13 -0700, Matthew Brost wrote:
> On Sat, Oct 25, 2025 at 02:04:12PM +0200, Thomas Hellström wrote:
> > Data present in foreign device memory may cause migration to fail.
> > For now, retry once after first migrating to system.
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_svm.c | 19 +++++++++++++++----
> > 1 file changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 9814f95cb212..41e075aa015c 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -1529,13 +1529,24 @@ struct drm_pagemap
> > *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> > int xe_svm_alloc_vram(struct xe_svm_range *range, const struct
> > drm_gpusvm_ctx *ctx,
> > struct drm_pagemap *dpagemap)
> > {
> > + int err, retries = 1;
> > +
> > xe_assert(range_to_vm(&range->base)->xe, range-
> > >base.pages.flags.migrate_devmem);
> > range_debug(range, "ALLOCATE VRAM");
> >
> > - return drm_pagemap_populate_mm(dpagemap,
> > xe_svm_range_start(range),
> > - xe_svm_range_end(range),
> > - range->base.gpusvm->mm,
> > - ctx->timeslice_ms);
> > +retry:
> > + err = drm_pagemap_populate_mm(dpagemap,
> > xe_svm_range_start(range),
> > + xe_svm_range_end(range),
> > + range->base.gpusvm->mm,
> > + ctx->timeslice_ms);
> > + if ((err == -EBUSY || err == -EFAULT) && retries--) {
>
> I don't think this is what we want to do here. -EFAULT indicates that
> the pages are entirely present somewhere in device memory. This could
> be
> either on the local device or on a foreign device, but we don’t have
> enough information here to determine which case it is.
>
> If this is on our local device, we're always good. This could occur
> playing mremap games.
>
> If it's on a foreign device, things get trickier. If our interconnect
> supports atomics we're still good. But if the interconnect
> doesn't support atomics (e.g., PCIe P2P), this an atomic fault, then
> we
> need to move the memory. Also, if there's no path between device
> memories, then of course we need to move the memory.
>
> Again, we don’t have enough information here to make the correct
> decision. We really need to call drm_gpusvm_range_get_pages to gather
> the CPU pages in order to make this kind of decision. Ideally, the
> logic
> should be built into drm_gpusvm_range_get_pages to understand atomic
> migration requirements.
For multi-device I'm just looking at a patch that considers p2p
migration and at the same time returns 0 if data is placed in
compatible memory, given migration policies and interconnects. But
until that patch lands, we need a way to evict memory from foreign
devices so that we can migrate to the desired device.
I would have expected that if memory is already present in local device
memory we'd have that xe_svm_range_in_vram() flag set and would not
attempt to migrate, at least in most cases? Currently, if data is
already fully or partly present in another p2p-device memory, and we
ignore the -EFAULT, then get_pages() wouldn't detect that? Or well, we
can look at the dpagemap returned from get_pages and retry the
migration at that point.
We also need to realize that with multi-gpu, the chances of migration
races increases dramatically and whether those return -EBUSY or -EFAULT
appears a bit arbitrary to me? We can't really assume that cpages == 0
means all pages are already where they are supposed to be.
My current thinking how to handle all this is the following:
1) xe_svm_range_in_vram(), first check to avoid migration.
2) (NEW, not implemented yet) if we decide to migrate, first run a
hmm_range_fault() without faulting flag to determine current memory
migration status - Perhaps optional. This may reject migration more
efficiently than if we collect pages for migration and then inspect
them, because then we've already sent an invalidation event.
3) Call into drm_pagemap_populate_mm(). This collects all compatible-
and system pages, and determines what to migrate. If no migration
needed, returns 0. If racing or needing to migrate foreign devices to
system, return -EBUSY,
4) If -EBUSY evict, and retry migration once.
For now, I think we make atomic faults use local VRAM only. Moving fow
>
> Once drm_gpusvm_range_get_pages returns, we can take appropriate
> action.
> Initially, for simplicity, this might just be a bounce to system
> memory.
> Later, it could evolve into a direct device-to-device move.
I agree we need a pass with hmm_range_fault(), question is in what
order we do this. I think a pass without the fault flag on before
trying to migrate would
a) Avoid populating with system pages for data that's going to be in
VRAM anyway,
b) Possibly avoiding collecting migrate pages and thus also an
invalidation for all devices.
The drawback is we'd might unnecessarily run a non-faulting
hmm_range_fault() when we need to migrate anyway. My thinking is, that
would be a rather quick call, though compared to the reverse lookups in
the migrate code.
>
> The logic inside drm_gpusvm_range_get_pages would likely involve
> devmem_only combined with a drm_pagemap passed in, which can detect
> connectivity and atomic support between devices—based on the
> drm_pagemap
> extracted from the ZDD.
>
> Let know if thia makes sense, or if you have thought about doing this
> in
> a follow up.
In any case I think we need to set up a flow-chart / flow-list similar
to the above and consider the most important cases and what to do with
them. for now, I think we can replace this patch if necessary with a
dpagemap check, whether desired equals what's present and rerun after
that. We'd probably need that anyway.
Thomas
>
> Matt
>
> > + range_debug(range, "ALLOCATE VRAM - Retry.");
> > +
> > + drm_gpusvm_range_evict(range->base.gpusvm, &range-
> > >base);
> > + goto retry;
> > + }
> > +
> > + return err;
> > }
> >
> > static struct drm_pagemap_addr
> > --
> > 2.51.0
> >
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [PATCH 15/15] drm/xe: Retry migration once
2025-10-28 9:11 ` Thomas Hellström
@ 2025-10-28 19:03 ` Matthew Brost
0 siblings, 0 replies; 49+ messages in thread
From: Matthew Brost @ 2025-10-28 19:03 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, dri-devel, himal.prasad.ghimiray, apopple, airlied,
Simona Vetter, felix.kuehling, Christian König, dakr,
Mrozek, Michal, Joonas Lahtinen
On Tue, Oct 28, 2025 at 10:11:18AM +0100, Thomas Hellström wrote:
> Hi, Matt
>
> On Mon, 2025-10-27 at 17:13 -0700, Matthew Brost wrote:
> > On Sat, Oct 25, 2025 at 02:04:12PM +0200, Thomas Hellström wrote:
> > > Data present in foreign device memory may cause migration to fail.
> > > For now, retry once after first migrating to system.
> > >
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_svm.c | 19 +++++++++++++++----
> > > 1 file changed, 15 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > > b/drivers/gpu/drm/xe/xe_svm.c
> > > index 9814f95cb212..41e075aa015c 100644
> > > --- a/drivers/gpu/drm/xe/xe_svm.c
> > > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > > @@ -1529,13 +1529,24 @@ struct drm_pagemap
> > > *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
> > > int xe_svm_alloc_vram(struct xe_svm_range *range, const struct
> > > drm_gpusvm_ctx *ctx,
> > > struct drm_pagemap *dpagemap)
> > > {
> > > + int err, retries = 1;
> > > +
> > > xe_assert(range_to_vm(&range->base)->xe, range-
> > > >base.pages.flags.migrate_devmem);
> > > range_debug(range, "ALLOCATE VRAM");
> > >
> > > - return drm_pagemap_populate_mm(dpagemap,
> > > xe_svm_range_start(range),
> > > - xe_svm_range_end(range),
> > > - range->base.gpusvm->mm,
> > > - ctx->timeslice_ms);
> > > +retry:
> > > + err = drm_pagemap_populate_mm(dpagemap,
> > > xe_svm_range_start(range),
> > > + xe_svm_range_end(range),
> > > + range->base.gpusvm->mm,
> > > + ctx->timeslice_ms);
> > > + if ((err == -EBUSY || err == -EFAULT) && retries--) {
> >
> > I don't think this is what we want to do here. -EFAULT indicates that
> > the pages are entirely present somewhere in device memory. This could
> > be
> > either on the local device or on a foreign device, but we don’t have
> > enough information here to determine which case it is.
> >
> > If this is on our local device, we're always good. This could occur
> > playing mremap games.
> >
> > If it's on a foreign device, things get trickier. If our interconnect
> > supports atomics we're still good. But if the interconnect
> > doesn't support atomics (e.g., PCIe P2P), this an atomic fault, then
> > we
> > need to move the memory. Also, if there's no path between device
> > memories, then of course we need to move the memory.
> >
> > Again, we don’t have enough information here to make the correct
> > decision. We really need to call drm_gpusvm_range_get_pages to gather
> > the CPU pages in order to make this kind of decision. Ideally, the
> > logic
> > should be built into drm_gpusvm_range_get_pages to understand atomic
> > migration requirements.
>
> For multi-device I'm just looking at a patch that considers p2p
> migration and at the same time returns 0 if data is placed in
> compatible memory, given migration policies and interconnects. But
> until that patch lands, we need a way to evict memory from foreign
> devices so that we can migrate to the desired device.
>
> I would have expected that if memory is already present in local device
> memory we'd have that xe_svm_range_in_vram() flag set and would not
> attempt to migrate, at least in most cases? Currently, if data is
We check whether a range has valid GPU mappings and skip the fault if
that’s the case. However, if a user performs an mremap while a GPU
mapping is present, the SVM range becomes new, but the CPU pages remain
valid. In this flow, the VRAM allocation fails, and get_pages correctly
locates the pages.
This situation is similar to a multi-device scenario, where another GPU
fault handler has already moved the CPU pages to the correct location.
We could add an additional step before attempting to allocate VRAM that
detects this condition—for example, by calling get_pages with specific
arguments. For what it's worth, get_pages is a very lightweight
function; if I recall correctly, its overhead is less than 0.05
microseconds.
> already fully or partly present in another p2p-device memory, and we
> ignore the -EFAULT, then get_pages() wouldn't detect that? Or well, we
> can look at the dpagemap returned from get_pages and retry the
> migration at that point.
>
> We also need to realize that with multi-gpu, the chances of migration
> races increases dramatically and whether those return -EBUSY or -EFAULT
> appears a bit arbitrary to me? We can't really assume that cpages == 0
> means all pages are already where they are supposed to be.
>
> My current thinking how to handle all this is the following:
>
> 1) xe_svm_range_in_vram(), first check to avoid migration.
> 2) (NEW, not implemented yet) if we decide to migrate, first run a
> hmm_range_fault() without faulting flag to determine current memory
> migration status - Perhaps optional. This may reject migration more
> efficiently than if we collect pages for migration and then inspect
> them, because then we've already sent an invalidation event.
Yes, I think parts 1 and 2 are probably variations of the get_pages
function mentioned above.
> 3) Call into drm_pagemap_populate_mm(). This collects all compatible-
> and system pages, and determines what to migrate. If no migration
> needed, returns 0. If racing or needing to migrate foreign devices to
> system, return -EBUSY,
> 4) If -EBUSY evict, and retry migration once.
>
> For now, I think we make atomic faults use local VRAM only. Moving fow
>
Maybe that makes sense, but drm_gpusvm_pages also includes drm_pagemap,
so adding logic to determine whether atomics are allowed shouldn't be
difficult either. We need to forward on this information to the xe_pt.c
layer to correctly set the atomic enable bit.
> >
> > Once drm_gpusvm_range_get_pages returns, we can take appropriate
> > action.
> > Initially, for simplicity, this might just be a bounce to system
> > memory.
> > Later, it could evolve into a direct device-to-device move.
>
> I agree we need a pass with hmm_range_fault(), question is in what
> order we do this. I think a pass without the fault flag on before
> trying to migrate would
>
> a) Avoid populating with system pages for data that's going to be in
> VRAM anyway,
Agreed. Whether CPU pages are present or not can dramatically affect
migration time, so it's best to avoid faulting pages that can be moved
immediately.
> b) Possibly avoiding collecting migrate pages and thus also an
> invalidation for all devices.
>
Yes, this is a higher overhead call hmm_range_fault.
> The drawback is we'd might unnecessarily run a non-faulting
> hmm_range_fault() when we need to migrate anyway. My thinking is, that
> would be a rather quick call, though compared to the reverse lookups in
> the migrate code.
>
Like I said, hmm_range_fault is quite fast—especially once we get 2MB
device pages—so I'm not too concerned about the overhead.
> >
> > The logic inside drm_gpusvm_range_get_pages would likely involve
> > devmem_only combined with a drm_pagemap passed in, which can detect
> > connectivity and atomic support between devices—based on the
> > drm_pagemap
> > extracted from the ZDD.
> >
> > Let know if thia makes sense, or if you have thought about doing this
> > in
> > a follow up.
>
> In any case I think we need to set up a flow-chart / flow-list similar
> to the above and consider the most important cases and what to do with
> them. for now, I think we can replace this patch if necessary with a
> dpagemap check, whether desired equals what's present and rerun after
> that. We'd probably need that anyway.
>
Yes, some flowcharts for our GPU fault handler would be helpful. It's
already quite complex, and multi-GPU support adds even more complexity.
We have simple examples in the GPU SVM documentation, but perhaps we
could put together a more detailed kernel doc in xe_svm.c that outlines
various scenarios and what we expect to happen in each.
Matt
> Thomas
>
>
>
>
> >
> > Matt
> >
> > > + range_debug(range, "ALLOCATE VRAM - Retry.");
> > > +
> > > + drm_gpusvm_range_evict(range->base.gpusvm, &range-
> > > >base);
> > > + goto retry;
> > > + }
> > > +
> > > + return err;
> > > }
> > >
> > > static struct drm_pagemap_addr
> > > --
> > > 2.51.0
> > >
>
^ permalink raw reply [flat|nested] 49+ messages in thread