From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Matthew Brost" <matthew.brost@intel.com>,
dri-devel@lists.freedesktop.org, himal.prasad.ghimiray@intel.com,
apopple@nvidia.com, airlied@gmail.com,
"Simona Vetter" <simona.vetter@ffwll.ch>,
felix.kuehling@amd.com,
"Christian König" <christian.koenig@amd.com>,
dakr@kernel.org, "Mrozek, Michal" <michal.mrozek@intel.com>,
"Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>
Subject: [PATCH v4 05/22] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
Date: Thu, 11 Dec 2025 17:58:52 +0100 [thread overview]
Message-ID: <20251211165909.219710-6-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20251211165909.219710-1-thomas.hellstrom@linux.intel.com>
If a device holds a reference on a foregin device's drm_pagemap,
and a device unbind is executed on the foreign device,
Typically that foreign device would evict its device-private
pages and then continue its device-managed cleanup eventually
releasing its drm device and possibly allow for module unload.
However, since we're still holding a reference on a drm_pagemap,
when that reference is released and the provider module is
unloaded we'd execute out of undefined memory.
Therefore keep a reference on the provider device and module until
the last drm_pagemap reference is gone.
Note that in theory, the drm_gpusvm_helper module may be unloaded
as soon as the final module_put() of the provider driver module is
executed, so we need to add a module_exit() function that waits
for the work item executing the module_put() has completed.
v2:
- Better commit message (Matt Brost)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 101 ++++++++++++++++++++++++++++++++--
drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
include/drm/drm_pagemap.h | 10 +++-
3 files changed, 117 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 21b9d3225160..ebf160f594ed 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
+#include <drm/drm_print.h>
/**
* DOC: Overview
@@ -545,16 +546,92 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
return -ENOMEM;
}
+static void drm_pagemap_dev_unhold_work(struct work_struct *work);
+static LLIST_HEAD(drm_pagemap_unhold_list);
+static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
+
+/**
+ * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
+ * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
+ * @drm: drm device to put.
+ *
+ * When a struct drm_pagemap is released, we also need to release the
+ * reference it holds on the drm device. However, typically that needs
+ * to be done separately from a system-wide workqueue.
+ * Each time a struct drm_pagemap is initialized
+ * (or re-initialized if cached) therefore allocate a separate
+ * drm_pagemap_dev_hold item, from which we put the drm device and
+ * associated module.
+ */
+struct drm_pagemap_dev_hold {
+ struct llist_node link;
+ struct drm_device *drm;
+};
+
static void drm_pagemap_release(struct kref *ref)
{
struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
-
+ struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
+
+ /*
+ * We know the pagemap provider is alive at this point, since
+ * the struct drm_pagemap_dev_hold holds a reference to the
+ * pagemap provider drm_device and its module.
+ */
+ dpagemap->dev_hold = NULL;
kfree(dpagemap);
+ llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
+ schedule_work(&drm_pagemap_work);
+ /*
+ * Here, either the provider device is still alive, since if called from
+ * page_free(), the caller is holding a reference on the dev_pagemap,
+ * or if called from drm_pagemap_put(), the direct caller is still alive.
+ * This ensures we can't race with THIS module unload.
+ */
+}
+
+static void drm_pagemap_dev_unhold_work(struct work_struct *work)
+{
+ struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
+ struct drm_pagemap_dev_hold *dev_hold, *next;
+
+ /*
+ * Deferred release of drm_pagemap provider device and module.
+ * THIS module is kept alive during the release by the
+ * flush_work() in the drm_pagemap_exit() function.
+ */
+ llist_for_each_entry_safe(dev_hold, next, node, link) {
+ struct drm_device *drm = dev_hold->drm;
+ struct module *module = drm->driver->fops->owner;
+
+ drm_dbg(drm, "Releasing reference on provider device and module.\n");
+ drm_dev_put(drm);
+ module_put(module);
+ kfree(dev_hold);
+ }
+}
+
+static struct drm_pagemap_dev_hold *
+drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_dev_hold *dev_hold;
+ struct drm_device *drm = dpagemap->drm;
+
+ dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
+ if (!dev_hold)
+ return ERR_PTR(-ENOMEM);
+
+ init_llist_node(&dev_hold->link);
+ dev_hold->drm = drm;
+ (void)try_module_get(drm->driver->fops->owner);
+ drm_dev_get(drm);
+
+ return dev_hold;
}
/**
* drm_pagemap_create() - Create a struct drm_pagemap.
- * @dev: Pointer to a struct device providing the device-private memory.
+ * @drm: Pointer to a struct drm_device providing the device-private memory.
* @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
* @ops: Pointer to the struct drm_pagemap_ops.
*
@@ -564,20 +641,28 @@ static void drm_pagemap_release(struct kref *ref)
* Error pointer on error.
*/
struct drm_pagemap *
-drm_pagemap_create(struct device *dev,
+drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops)
{
struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
+ struct drm_pagemap_dev_hold *dev_hold;
if (!dpagemap)
return ERR_PTR(-ENOMEM);
kref_init(&dpagemap->ref);
- dpagemap->dev = dev;
+ dpagemap->drm = drm;
dpagemap->ops = ops;
dpagemap->pagemap = pagemap;
+ dev_hold = drm_pagemap_dev_hold(dpagemap);
+ if (IS_ERR(dev_hold)) {
+ kfree(dpagemap);
+ return ERR_CAST(dev_hold);
+ }
+ dpagemap->dev_hold = dev_hold;
+
return dpagemap;
}
EXPORT_SYMBOL(drm_pagemap_create);
@@ -942,3 +1027,11 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
return err;
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);
+
+static void drm_pagemap_exit(void)
+{
+ flush_work(&drm_pagemap_work);
+ if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
+ disable_work_sync(&drm_pagemap_work);
+}
+module_exit(drm_pagemap_exit);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index e4036f428dc0..cffcf96943b7 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1492,7 +1492,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
unsigned int order,
enum dma_data_direction dir)
{
- struct device *pgmap_dev = dpagemap->dev;
+ struct device *pgmap_dev = dpagemap->drm->dev;
enum drm_interconnect_protocol prot;
dma_addr_t addr;
@@ -1512,6 +1512,14 @@ static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.populate_mm = xe_drm_pagemap_populate_mm,
};
+static void xe_devm_release(void *data)
+{
+ struct xe_vram_region *vr = data;
+
+ drm_pagemap_put(vr->dpagemap);
+ vr->dpagemap = NULL;
+}
+
/**
* xe_devm_add: Remap and provide memmap backing for device memory
* @tile: tile that the memory region belongs to
@@ -1537,7 +1545,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return ret;
}
- vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
+ vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
&xe_drm_pagemap_ops);
if (IS_ERR(vr->dpagemap)) {
drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
@@ -1545,6 +1553,9 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
ret = PTR_ERR(vr->dpagemap);
goto out_no_dpagemap;
}
+ ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
+ if (ret)
+ goto out_no_dpagemap;
vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
vr->pagemap.range.start = res->start;
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index 093e7199c44b..b3edcdde4454 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -10,6 +10,7 @@
struct dma_fence;
struct drm_pagemap;
+struct drm_pagemap_dev_hold;
struct drm_pagemap_zdd;
struct device;
@@ -131,14 +132,17 @@ struct drm_pagemap_ops {
* used for device p2p handshaking.
* @ops: The struct drm_pagemap_ops.
* @ref: Reference count.
- * @dev: The struct drevice owning the device-private memory.
+ * @drm: The struct drm device owning the device-private memory.
* @pagemap: Pointer to the underlying dev_pagemap.
+ * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
+ * device referencing.
*/
struct drm_pagemap {
const struct drm_pagemap_ops *ops;
struct kref ref;
- struct device *dev;
+ struct drm_device *drm;
struct dev_pagemap *pagemap;
+ struct drm_pagemap_dev_hold *dev_hold;
};
struct drm_pagemap_devmem;
@@ -213,7 +217,7 @@ struct drm_pagemap_devmem_ops {
struct dma_fence *pre_migrate_fence);
};
-struct drm_pagemap *drm_pagemap_create(struct device *dev,
+struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops);
--
2.51.1
next prev parent reply other threads:[~2025-12-11 16:59 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-11 16:58 [PATCH v4 00/22] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 01/22] drm/xe/svm: Fix a debug printout Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 02/22] drm/pagemap, drm/xe: Ensure that the devmem allocation is idle before use Thomas Hellström
2025-12-12 8:56 ` Thomas Hellström
2025-12-12 9:24 ` Ghimiray, Himal Prasad
2025-12-12 10:15 ` Thomas Hellström
2025-12-12 10:17 ` Ghimiray, Himal Prasad
2025-12-11 16:58 ` [PATCH v4 03/22] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
2025-12-12 11:24 ` Ghimiray, Himal Prasad
2025-12-11 16:58 ` [PATCH v4 04/22] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd Thomas Hellström
2025-12-11 16:58 ` Thomas Hellström [this message]
2025-12-11 16:58 ` [PATCH v4 06/22] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 07/22] drm/xe: Use the " Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 08/22] drm/pagemap: Remove the drm_pagemap_create() interface Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 09/22] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 10/22] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 11/22] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes Thomas Hellström
2025-12-11 16:58 ` [PATCH v4 12/22] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
2025-12-12 9:51 ` Ghimiray, Himal Prasad
2025-12-11 16:59 ` [PATCH v4 13/22] drm/xe: Simplify madvise_preferred_mem_loc() Thomas Hellström
2025-12-11 16:59 ` [PATCH v4 14/22] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm Thomas Hellström
2025-12-11 16:59 ` [PATCH v4 15/22] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
2025-12-11 16:59 ` [PATCH v4 16/22] drm/xe/vm: Add a couple of VM debug printouts Thomas Hellström
2025-12-11 16:59 ` [PATCH v4 17/22] drm/xe/svm: Document how xe keeps drm_pagemap references Thomas Hellström
2025-12-11 16:59 ` [PATCH v4 18/22] drm/pagemap, drm/xe: Clean up the use of the device-private page owner Thomas Hellström
2025-12-12 10:09 ` Ghimiray, Himal Prasad
2025-12-11 16:59 ` [PATCH v4 19/22] drm/gpusvm: Introduce a function to scan the current migration state Thomas Hellström
2025-12-12 11:21 ` Ghimiray, Himal Prasad
2025-12-12 11:35 ` Thomas Hellström
2025-12-16 0:58 ` Matthew Brost
2025-12-16 23:55 ` Matthew Brost
2025-12-17 6:57 ` Ghimiray, Himal Prasad
2025-12-11 16:59 ` [PATCH v4 20/22] drm/xe: Use drm_gpusvm_scan_mm() Thomas Hellström
2025-12-16 1:06 ` Matthew Brost
2025-12-17 6:58 ` Ghimiray, Himal Prasad
2025-12-11 16:59 ` [PATCH v4 21/22] drm/pagemap, drm/xe: Support destination migration over interconnect Thomas Hellström
2025-12-18 1:20 ` Matthew Brost
2025-12-11 16:59 ` [PATCH v4 22/22] drm/pagemap: Support source " Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251211165909.219710-6-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=apopple@nvidia.com \
--cc=christian.koenig@amd.com \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=felix.kuehling@amd.com \
--cc=himal.prasad.ghimiray@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=joonas.lahtinen@linux.intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).