From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
dri-devel@lists.freedesktop.org, himal.prasad.ghimiray@intel.com,
apopple@nvidia.com, airlied@gmail.com,
"Simona Vetter" <simona.vetter@ffwll.ch>,
felix.kuehling@amd.com, "Matthew Brost" <matthew.brost@intel.com>,
"Christian König" <christian.koenig@amd.com>,
dakr@kernel.org, "Mrozek, Michal" <michal.mrozek@intel.com>,
"Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>
Subject: [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes
Date: Sat, 25 Oct 2025 14:04:00 +0200 [thread overview]
Message-ID: <20251025120412.12262-4-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20251025120412.12262-1-thomas.hellstrom@linux.intel.com>
Even if the drm_pagemap provider has released its reference on
the drm_pagemap, references may be held by still active pages.
Ensure that we hold a reference on the provider drm device and
modules for as long as we might need to use the drm_pagemap ops.
Note that in theory, the drm_gpusvm_helper module may be unloaded
as soon as the final module_put() of the provider driver module is
executed, so we need to add a module_exit() function that waits
for the work item executing the module_put() has completed.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/drm_pagemap.c | 101 ++++++++++++++++++++++++++++++++--
drivers/gpu/drm/xe/xe_svm.c | 15 ++++-
include/drm/drm_pagemap.h | 10 +++-
3 files changed, 117 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 173b3ecb07d5..fb18a80d6a1c 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
+#include <drm/drm_print.h>
/**
* DOC: Overview
@@ -544,16 +545,92 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
return -ENOMEM;
}
+static void drm_pagemap_dev_unhold_work(struct work_struct *work);
+static LLIST_HEAD(drm_pagemap_unhold_list);
+static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
+
+/**
+ * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
+ * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
+ * @drm: drm device to put.
+ *
+ * When a struct drm_pagemap is released, we also need to release the
+ * reference it holds on the drm device. However, typically that needs
+ * to be done separately from a system-wide workqueue.
+ * Each time a struct drm_pagemap is initialized
+ * (or re-initialized if cached) therefore allocate a separate
+ * drm_pagemap_dev_hold item, from which we put the drm device and
+ * associated module.
+ */
+struct drm_pagemap_dev_hold {
+ struct llist_node link;
+ struct drm_device *drm;
+};
+
static void drm_pagemap_release(struct kref *ref)
{
struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
-
+ struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
+
+ /*
+ * We know the pagemap provider is alive at this point, since
+ * the struct drm_pagemap_dev_hold holds a reference to the
+ * pagemap provider drm_device and its module.
+ */
+ dpagemap->dev_hold = NULL;
kfree(dpagemap);
+ llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
+ schedule_work(&drm_pagemap_work);
+ /*
+ * Here, either the provider device is still alive, since if called from
+ * page_free(), the caller is holding a reference on the dev_pagemap,
+ * or if called from drm_pagemap_put(), the direct caller is still alive.
+ * This ensures we can't race with THIS module unload.
+ */
+}
+
+static void drm_pagemap_dev_unhold_work(struct work_struct *work)
+{
+ struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
+ struct drm_pagemap_dev_hold *dev_hold, *next;
+
+ /*
+ * Deferred release of drm_pagemap provider device and module.
+ * THIS module is kept alive during the release by the
+ * flush_work() in the drm_pagemap_exit() function.
+ */
+ llist_for_each_entry_safe(dev_hold, next, node, link) {
+ struct drm_device *drm = dev_hold->drm;
+ struct module *module = drm->driver->fops->owner;
+
+ drm_dbg(drm, "Releasing reference on provider device and module.\n");
+ drm_dev_put(drm);
+ module_put(module);
+ kfree(dev_hold);
+ }
+}
+
+static struct drm_pagemap_dev_hold *
+drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_dev_hold *dev_hold;
+ struct drm_device *drm = dpagemap->drm;
+
+ dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
+ if (!dev_hold)
+ return ERR_PTR(-ENOMEM);
+
+ init_llist_node(&dev_hold->link);
+ dev_hold->drm = drm;
+ (void)try_module_get(drm->driver->fops->owner);
+ drm_dev_get(drm);
+
+ return dev_hold;
}
/**
* drm_pagemap_create() - Create a struct drm_pagemap.
- * @dev: Pointer to a struct device providing the device-private memory.
+ * @drm: Pointer to a struct drm_device providing the device-private memory.
* @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
* @ops: Pointer to the struct drm_pagemap_ops.
*
@@ -563,20 +640,28 @@ static void drm_pagemap_release(struct kref *ref)
* Error pointer on error.
*/
struct drm_pagemap *
-drm_pagemap_create(struct device *dev,
+drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops)
{
struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
+ struct drm_pagemap_dev_hold *dev_hold;
if (!dpagemap)
return ERR_PTR(-ENOMEM);
kref_init(&dpagemap->ref);
- dpagemap->dev = dev;
+ dpagemap->drm = drm;
dpagemap->ops = ops;
dpagemap->pagemap = pagemap;
+ dev_hold = drm_pagemap_dev_hold(dpagemap);
+ if (IS_ERR(dev_hold)) {
+ kfree(dpagemap);
+ return ERR_CAST(dev_hold);
+ }
+ dpagemap->dev_hold = dev_hold;
+
return dpagemap;
}
EXPORT_SYMBOL(drm_pagemap_create);
@@ -937,3 +1022,11 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
return err;
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);
+
+static void drm_pagemap_exit(void)
+{
+ flush_work(&drm_pagemap_work);
+ if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
+ disable_work_sync(&drm_pagemap_work);
+}
+module_exit(drm_pagemap_exit);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6d2c6c144315..f6ee22da2e95 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1437,7 +1437,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
unsigned int order,
enum dma_data_direction dir)
{
- struct device *pgmap_dev = dpagemap->dev;
+ struct device *pgmap_dev = dpagemap->drm->dev;
enum drm_interconnect_protocol prot;
dma_addr_t addr;
@@ -1457,6 +1457,14 @@ static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.populate_mm = xe_drm_pagemap_populate_mm,
};
+static void xe_devm_release(void *data)
+{
+ struct xe_vram_region *vr = data;
+
+ drm_pagemap_put(vr->dpagemap);
+ vr->dpagemap = NULL;
+}
+
/**
* xe_devm_add: Remap and provide memmap backing for device memory
* @tile: tile that the memory region belongs to
@@ -1482,7 +1490,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return ret;
}
- vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
+ vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
&xe_drm_pagemap_ops);
if (IS_ERR(vr->dpagemap)) {
drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
@@ -1490,6 +1498,9 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
ret = PTR_ERR(vr->dpagemap);
goto out_no_dpagemap;
}
+ ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
+ if (ret)
+ goto out_no_dpagemap;
vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
vr->pagemap.range.start = res->start;
diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h
index 2c7de928865b..5cfe54331ba7 100644
--- a/include/drm/drm_pagemap.h
+++ b/include/drm/drm_pagemap.h
@@ -9,6 +9,7 @@
#define NR_PAGES(order) (1U << (order))
struct drm_pagemap;
+struct drm_pagemap_dev_hold;
struct drm_pagemap_zdd;
struct device;
@@ -130,14 +131,17 @@ struct drm_pagemap_ops {
* used for device p2p handshaking.
* @ops: The struct drm_pagemap_ops.
* @ref: Reference count.
- * @dev: The struct drevice owning the device-private memory.
+ * @drm: The struct drm device owning the device-private memory.
* @pagemap: Pointer to the underlying dev_pagemap.
+ * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
+ * device referencing.
*/
struct drm_pagemap {
const struct drm_pagemap_ops *ops;
struct kref ref;
- struct device *dev;
+ struct drm_device *drm;
struct dev_pagemap *pagemap;
+ struct drm_pagemap_dev_hold *dev_hold;
};
struct drm_pagemap_devmem;
@@ -206,7 +210,7 @@ struct drm_pagemap_devmem_ops {
unsigned long npages);
};
-struct drm_pagemap *drm_pagemap_create(struct device *dev,
+struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
struct dev_pagemap *pagemap,
const struct drm_pagemap_ops *ops);
--
2.51.0
next prev parent reply other threads:[~2025-10-25 12:04 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-25 12:03 [PATCH 00/15] Dynamic drm_pagemaps and Initial multi-device SVM Thomas Hellström
2025-10-25 12:03 ` [PATCH 01/15] drm/pagemap, drm/xe: Add refcounting to struct drm_pagemap Thomas Hellström
2025-10-29 0:31 ` Matthew Brost
2025-10-29 1:11 ` Matthew Brost
2025-10-29 14:51 ` Thomas Hellström
2025-10-25 12:03 ` [PATCH 02/15] drm/pagemap: Add a refcounted drm_pagemap backpointer to struct drm_pagemap_zdd Thomas Hellström
2025-10-29 0:33 ` Matthew Brost
2025-10-25 12:04 ` Thomas Hellström [this message]
2025-10-29 0:46 ` [PATCH 03/15] drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes Matthew Brost
2025-10-29 14:49 ` Thomas Hellström
2025-10-30 2:46 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 04/15] drm/pagemap: Add a drm_pagemap cache and shrinker Thomas Hellström
2025-10-28 1:23 ` Matthew Brost
2025-10-28 9:46 ` Thomas Hellström
2025-10-28 10:29 ` Thomas Hellström
2025-10-28 18:38 ` Matthew Brost
2025-10-29 22:41 ` Matthew Brost
2025-10-29 22:48 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 05/15] drm/xe: Use the " Thomas Hellström
2025-10-30 0:43 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 06/15] drm/pagemap: Remove the drm_pagemap_create() interface Thomas Hellström
2025-10-29 1:00 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 07/15] drm/pagemap_util: Add a utility to assign an owner to a set of interconnected gpus Thomas Hellström
2025-10-29 1:21 ` Matthew Brost
2025-10-29 14:52 ` Thomas Hellström
2025-10-25 12:04 ` [PATCH 08/15] drm/xe: Use the drm_pagemap_util helper to get a svm pagemap owner Thomas Hellström
2025-10-27 23:02 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 09/15] drm/xe: Pass a drm_pagemap pointer around with the memory advise attributes Thomas Hellström
2025-10-28 0:35 ` Matthew Brost
2025-11-26 0:31 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 10/15] drm/xe: Use the vma attibute drm_pagemap to select where to migrate Thomas Hellström
2025-10-25 18:01 ` kernel test robot
2025-10-29 3:27 ` Matthew Brost
2025-10-29 14:56 ` Thomas Hellström
2025-10-29 16:59 ` kernel test robot
2025-10-25 12:04 ` [PATCH 11/15] drm/xe: Simplify madvise_preferred_mem_loc() Thomas Hellström
2025-10-27 23:14 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 12/15] drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm Thomas Hellström
2025-10-28 0:51 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 13/15] drm/xe: Support pcie p2p dma as a fast interconnect Thomas Hellström
2025-10-28 1:14 ` Matthew Brost
2025-10-28 9:32 ` Thomas Hellström
2025-10-29 2:17 ` Matthew Brost
2025-10-29 14:54 ` Thomas Hellström
2025-10-25 12:04 ` [PATCH 14/15] drm/xe/vm: Add a prefetch debug printout Thomas Hellström
2025-10-27 23:16 ` Matthew Brost
2025-10-25 12:04 ` [PATCH 15/15] drm/xe: Retry migration once Thomas Hellström
2025-10-28 0:13 ` Matthew Brost
2025-10-28 9:11 ` Thomas Hellström
2025-10-28 19:03 ` Matthew Brost
2025-10-25 12:16 ` ✗ CI.checkpatch: warning for Dynamic drm_pagemaps and Initial multi-device SVM Patchwork
2025-10-25 12:17 ` ✓ CI.KUnit: success " Patchwork
2025-10-25 13:06 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-25 14:14 ` ✗ Xe.CI.Full: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251025120412.12262-4-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=apopple@nvidia.com \
--cc=christian.koenig@amd.com \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=felix.kuehling@amd.com \
--cc=himal.prasad.ghimiray@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=joonas.lahtinen@linux.intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.