* [PATCH v3 1/3] drm/xe: Attempt to bring bos back to VRAM after eviction
[not found] <20250904130614.3212-1-thomas.hellstrom@linux.intel.com>
@ 2025-09-04 13:06 ` Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 2/3] drm/xe: Allow the pm notifier to continue on failure Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate Thomas Hellström
2 siblings, 0 replies; 5+ messages in thread
From: Thomas Hellström @ 2025-09-04 13:06 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Matthew Brost, Matthew Auld, stable,
Rodrigo vivi, Maarten Lankhorst
VRAM+TT bos that are evicted from VRAM to TT may remain in
TT also after a revalidation following eviction or suspend.
This manifests itself as applications becoming sluggish
after buffer objects get evicted or after a resume from
suspend or hibernation.
If the bo supports placement in both VRAM and TT, and
we are on DGFX, mark the TT placement as fallback. This means
that it is tried only after VRAM + eviction.
This flaw has probably been present since the xe module was
upstreamed but use a Fixes: commit below where backporting is
likely to be simple. For earlier versions we need to open-
code the fallback algorithm in the driver.
v2:
- Remove check for dgfx. (Matthew Auld)
- Update the xe_dma_buf kunit test for the new strategy (CI)
- Allow dma-buf to pin in current placement (CI)
- Make xe_bo_validate() for pinned bos a NOP.
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5995
Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6")
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: <stable@vger.kernel.org> # v6.9+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> #v1
---
drivers/gpu/drm/xe/tests/xe_bo.c | 2 +-
drivers/gpu/drm/xe/tests/xe_dma_buf.c | 10 +---------
drivers/gpu/drm/xe/xe_bo.c | 16 ++++++++++++----
drivers/gpu/drm/xe/xe_bo.h | 2 +-
drivers/gpu/drm/xe/xe_dma_buf.c | 2 +-
5 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index bb469096d072..7b40cc8be1c9 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -236,7 +236,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
}
xe_bo_lock(external, false);
- err = xe_bo_pin_external(external);
+ err = xe_bo_pin_external(external, false);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo pin err=%pe\n",
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 3c5ad8cc65a0..5baeab6b6fb7 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -85,15 +85,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
return;
}
- /*
- * If on different devices, the exporter is kept in system if
- * possible, saving a migration step as the transfer is just
- * likely as fast from system memory.
- */
- if (params->mem_mask & XE_BO_FLAG_SYSTEM)
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
- else
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
if (params->force_different_devices)
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4faf15d5fa6d..870f43347281 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -188,6 +188,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT,
+ .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
+ TTM_PL_FLAG_FALLBACK : 0,
};
*c += 1;
}
@@ -2322,6 +2324,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
/**
* xe_bo_pin_external - pin an external BO
* @bo: buffer object to be pinned
+ * @in_place: Pin in current placement, don't attempt to migrate.
*
* Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
* BO. Unique call compared to xe_bo_pin as this function has it own set of
@@ -2329,7 +2332,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_bo_pin_external(struct xe_bo *bo)
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
{
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -2338,9 +2341,11 @@ int xe_bo_pin_external(struct xe_bo *bo)
xe_assert(xe, xe_bo_is_user(bo));
if (!xe_bo_is_pinned(bo)) {
- err = xe_bo_validate(bo, NULL, false);
- if (err)
- return err;
+ if (!in_place) {
+ err = xe_bo_validate(bo, NULL, false);
+ if (err)
+ return err;
+ }
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
@@ -2493,6 +2498,9 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
};
int ret;
+ if (xe_bo_is_pinned(bo))
+ return 0;
+
if (vm) {
lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8cce413b5235..cfb1ec266a6d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -200,7 +200,7 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
}
}
-int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place);
int xe_bo_pin(struct xe_bo *bo);
void xe_bo_unpin_external(struct xe_bo *bo);
void xe_bo_unpin(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 346f857f3837..af64baf872ef 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -72,7 +72,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return ret;
}
- ret = xe_bo_pin_external(bo);
+ ret = xe_bo_pin_external(bo, true);
xe_assert(xe, !ret);
return 0;
--
2.50.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 2/3] drm/xe: Allow the pm notifier to continue on failure
[not found] <20250904130614.3212-1-thomas.hellstrom@linux.intel.com>
2025-09-04 13:06 ` [PATCH v3 1/3] drm/xe: Attempt to bring bos back to VRAM after eviction Thomas Hellström
@ 2025-09-04 13:06 ` Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate Thomas Hellström
2 siblings, 0 replies; 5+ messages in thread
From: Thomas Hellström @ 2025-09-04 13:06 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Matthew Auld, Rodrigo Vivi, stable,
Matthew Brost, Maarten Lankhorst
Its actions are opportunistic anyway and will be completed
on device suspend.
Marking as a fix to simplify backporting of the fix
that follows in the series.
v2:
- Keep the runtime pm reference over suspend / hibernate and
document why. (Matt Auld, Rodrigo Vivi):
Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier")
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v6.16+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_pm.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index a2e85030b7f4..bee9aacd82e7 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -308,17 +308,17 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
case PM_SUSPEND_PREPARE:
xe_pm_runtime_get(xe);
err = xe_bo_evict_all_user(xe);
- if (err) {
+ if (err)
drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- break;
- }
err = xe_bo_notifier_prepare_all_pinned(xe);
- if (err) {
+ if (err)
drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- }
+ /*
+ * Keep the runtime pm reference until post hibernation / post suspend to
+ * avoid a runtime suspend interfering with evicted objects or backup
+ * allocations.
+ */
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
@@ -327,9 +327,6 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
break;
}
- if (err)
- return NOTIFY_BAD;
-
return NOTIFY_DONE;
}
--
2.50.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate
[not found] <20250904130614.3212-1-thomas.hellstrom@linux.intel.com>
2025-09-04 13:06 ` [PATCH v3 1/3] drm/xe: Attempt to bring bos back to VRAM after eviction Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 2/3] drm/xe: Allow the pm notifier to continue on failure Thomas Hellström
@ 2025-09-04 13:06 ` Thomas Hellström
2025-09-04 15:22 ` Matthew Auld
2 siblings, 1 reply; 5+ messages in thread
From: Thomas Hellström @ 2025-09-04 13:06 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Matthew Auld, Rodrigo Vivi, stable,
Matthew Brost, Maarten Lankhorst
When the xe pm_notifier evicts for suspend / hibernate, there might be
racing tasks trying to re-validate again. This can lead to suspend taking
excessive time or get stuck in a live-lock. This behaviour becomes
much worse with the fix that actually makes re-validation bring back
bos to VRAM rather than letting them remain in TT.
Prevent that by having exec and the rebind worker waiting for a completion
that is set to block by the pm_notifier before suspend and is signaled
by the pm_notifier after resume / wakeup.
It's probably still possible to craft malicious applications that block
suspending. More work is pending to fix that.
v3:
- Avoid wait_for_completion() in the kernel worker since it could
potentially cause work item flushes from freezable processes to
wait forever. Instead terminate the rebind workers if needed and
re-launch at resume. (Matt Auld)
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4288
Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier")
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v6.16+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_device_types.h | 6 ++++
drivers/gpu/drm/xe/xe_exec.c | 9 ++++++
drivers/gpu/drm/xe/xe_pm.c | 20 ++++++++++++
drivers/gpu/drm/xe/xe_vm.c | 46 +++++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_vm.h | 2 ++
drivers/gpu/drm/xe/xe_vm_types.h | 5 +++
6 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 092004d14db2..1e780f8a2a8c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -507,6 +507,12 @@ struct xe_device {
/** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */
struct notifier_block pm_notifier;
+ /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
+ struct completion pm_block;
+ /** @rebind_resume_list: List of wq items to kick on resume. */
+ struct list_head rebind_resume_list;
+ /** @rebind_resume_lock: Lock to protect the rebind_resume_list */
+ struct mutex rebind_resume_lock;
/** @pmt: Support the PMT driver callback interface */
struct {
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 44364c042ad7..374c831e691b 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -237,6 +237,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_unlock_list;
}
+ /*
+ * It's OK to block interruptible here with the vm lock held, since
+ * on task freezing during suspend / hibernate, the call will
+ * return -ERESTARTSYS and the IOCTL will be rerun.
+ */
+ err = wait_for_completion_interruptible(&xe->pm_block);
+ if (err)
+ goto err_unlock_list;
+
vm_exec.vm = &vm->gpuvm;
vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
if (xe_vm_in_lr_mode(vm)) {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index bee9aacd82e7..6d59990ff6ba 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -297,6 +297,18 @@ static u32 vram_threshold_value(struct xe_device *xe)
return DEFAULT_VRAM_THRESHOLD;
}
+static void xe_pm_wake_preempt_workers(struct xe_device *xe)
+{
+ struct list_head *link, *next;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ list_for_each_safe(link, next, &xe->rebind_resume_list) {
+ list_del_init(link);
+ xe_vm_resume_preempt_worker(link);
+ }
+ mutex_unlock(&xe->rebind_resume_lock);
+}
+
static int xe_pm_notifier_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -306,6 +318,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
+ reinit_completion(&xe->pm_block);
xe_pm_runtime_get(xe);
err = xe_bo_evict_all_user(xe);
if (err)
@@ -322,6 +335,8 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
+ complete_all(&xe->pm_block);
+ xe_pm_wake_preempt_workers(xe);
xe_bo_notifier_unprepare_all_pinned(xe);
xe_pm_runtime_put(xe);
break;
@@ -348,6 +363,11 @@ int xe_pm_init(struct xe_device *xe)
if (err)
return err;
+ init_completion(&xe->pm_block);
+ complete_all(&xe->pm_block);
+ mutex_init(&xe->rebind_resume_lock);
+ INIT_LIST_HEAD(&xe->rebind_resume_list);
+
/* For now suspend/resume is only allowed with GuC */
if (!xe_device_uc_enabled(xe))
return 0;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f55f96bb240a..97aad1d53a8c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -394,6 +394,9 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
&vm->rebind_list);
+ if (!try_wait_for_completion(&vm->xe->pm_block))
+ return -EAGAIN;
+
ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
if (ret)
return ret;
@@ -480,6 +483,37 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
+static bool vm_suspend_preempt_worker(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ bool ret = false;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ if (!try_wait_for_completion(&vm->xe->pm_block)) {
+ ret = true;
+ list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
+ }
+ pr_info("Suspending %p\n", vm);
+ mutex_unlock(&xe->rebind_resume_lock);
+
+ return ret;
+}
+
+/**
+ * xe_vm_resume_preempt_worker() - Resume the preempt worker.
+ * @vm: The vm whose preempt worker to resume.
+ *
+ * Resume a preempt worker that was previously suspended by
+ * vm_suspend_preempt_worker().
+ */
+void xe_vm_resume_preempt_worker(struct list_head *link)
+{
+ struct xe_vm *vm = container_of(link, typeof(*vm), preempt.pm_activate_link);
+
+ pr_info("Resuming %p\n", vm);
+ queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
@@ -503,6 +537,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
retry:
+ if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_preempt_worker(vm)) {
+ up_write(&vm->lock);
+ return;
+ }
+
if (xe_vm_userptr_check_repin(vm)) {
err = xe_vm_userptr_pin(vm);
if (err)
@@ -1741,6 +1780,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
if (flags & XE_VM_FLAG_LR_MODE) {
INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
xe_pm_runtime_get_noresume(xe);
+ INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
}
if (flags & XE_VM_FLAG_FAULT_MODE) {
@@ -1922,8 +1962,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_assert(xe, !vm->preempt.num_exec_queues);
xe_vm_close(vm);
- if (xe_vm_in_preempt_fence_mode(vm))
+ if (xe_vm_in_preempt_fence_mode(vm)) {
+ mutex_lock(&xe->rebind_resume_lock);
+ list_del_init(&vm->preempt.pm_activate_link);
+ mutex_unlock(&xe->rebind_resume_lock);
flush_work(&vm->preempt.rebind_work);
+ }
if (xe_vm_in_fault_mode(vm))
xe_svm_close(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index b3e5bec0fa58..f2639794278b 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -281,6 +281,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
struct xe_exec_queue *q, u64 addr,
enum xe_cache_level cache_lvl);
+void xe_vm_resume_preempt_worker(struct list_head *link);
+
/**
* xe_vm_resv() - Return's the vm's reservation object
* @vm: The vm
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index b5108d010786..e1a786db5f89 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -338,6 +338,11 @@ struct xe_vm {
* BOs
*/
struct work_struct rebind_work;
+ /**
+ * @preempt.pm_activate_link: Link to list of rebind workers to be
+ * kicked on resume.
+ */
+ struct list_head pm_activate_link;
} preempt;
/** @um: unified memory state */
--
2.50.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate
2025-09-04 13:06 ` [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate Thomas Hellström
@ 2025-09-04 15:22 ` Matthew Auld
2025-09-04 15:30 ` Thomas Hellström
0 siblings, 1 reply; 5+ messages in thread
From: Matthew Auld @ 2025-09-04 15:22 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: Rodrigo Vivi, stable, Matthew Brost, Maarten Lankhorst
On 04/09/2025 14:06, Thomas Hellström wrote:
> When the xe pm_notifier evicts for suspend / hibernate, there might be
> racing tasks trying to re-validate again. This can lead to suspend taking
> excessive time or get stuck in a live-lock. This behaviour becomes
> much worse with the fix that actually makes re-validation bring back
> bos to VRAM rather than letting them remain in TT.
>
> Prevent that by having exec and the rebind worker waiting for a completion
> that is set to block by the pm_notifier before suspend and is signaled
> by the pm_notifier after resume / wakeup.
>
> It's probably still possible to craft malicious applications that block
> suspending. More work is pending to fix that.
>
> v3:
> - Avoid wait_for_completion() in the kernel worker since it could
> potentially cause work item flushes from freezable processes to
> wait forever. Instead terminate the rebind workers if needed and
> re-launch at resume. (Matt Auld)
>
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4288
> Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier")
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: <stable@vger.kernel.org> # v6.16+
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_device_types.h | 6 ++++
> drivers/gpu/drm/xe/xe_exec.c | 9 ++++++
> drivers/gpu/drm/xe/xe_pm.c | 20 ++++++++++++
> drivers/gpu/drm/xe/xe_vm.c | 46 +++++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_vm.h | 2 ++
> drivers/gpu/drm/xe/xe_vm_types.h | 5 +++
> 6 files changed, 87 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 092004d14db2..1e780f8a2a8c 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -507,6 +507,12 @@ struct xe_device {
>
> /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */
> struct notifier_block pm_notifier;
> + /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
> + struct completion pm_block;
> + /** @rebind_resume_list: List of wq items to kick on resume. */
> + struct list_head rebind_resume_list;
> + /** @rebind_resume_lock: Lock to protect the rebind_resume_list */
> + struct mutex rebind_resume_lock;
>
> /** @pmt: Support the PMT driver callback interface */
> struct {
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 44364c042ad7..374c831e691b 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -237,6 +237,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> goto err_unlock_list;
> }
>
> + /*
> + * It's OK to block interruptible here with the vm lock held, since
> + * on task freezing during suspend / hibernate, the call will
> + * return -ERESTARTSYS and the IOCTL will be rerun.
> + */
> + err = wait_for_completion_interruptible(&xe->pm_block);
> + if (err)
> + goto err_unlock_list;
> +
> vm_exec.vm = &vm->gpuvm;
> vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
> if (xe_vm_in_lr_mode(vm)) {
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index bee9aacd82e7..6d59990ff6ba 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -297,6 +297,18 @@ static u32 vram_threshold_value(struct xe_device *xe)
> return DEFAULT_VRAM_THRESHOLD;
> }
>
> +static void xe_pm_wake_preempt_workers(struct xe_device *xe)
> +{
> + struct list_head *link, *next;
> +
> + mutex_lock(&xe->rebind_resume_lock);
> + list_for_each_safe(link, next, &xe->rebind_resume_list) {
> + list_del_init(link);
> + xe_vm_resume_preempt_worker(link);
> + }
> + mutex_unlock(&xe->rebind_resume_lock);
> +}
> +
> static int xe_pm_notifier_callback(struct notifier_block *nb,
> unsigned long action, void *data)
> {
> @@ -306,6 +318,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
> switch (action) {
> case PM_HIBERNATION_PREPARE:
> case PM_SUSPEND_PREPARE:
> + reinit_completion(&xe->pm_block);
> xe_pm_runtime_get(xe);
> err = xe_bo_evict_all_user(xe);
> if (err)
> @@ -322,6 +335,8 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
> break;
> case PM_POST_HIBERNATION:
> case PM_POST_SUSPEND:
> + complete_all(&xe->pm_block);
> + xe_pm_wake_preempt_workers(xe);
> xe_bo_notifier_unprepare_all_pinned(xe);
> xe_pm_runtime_put(xe);
> break;
> @@ -348,6 +363,11 @@ int xe_pm_init(struct xe_device *xe)
> if (err)
> return err;
>
> + init_completion(&xe->pm_block);
> + complete_all(&xe->pm_block);
> + mutex_init(&xe->rebind_resume_lock);
err = drmm_mutex_init(&xe->rebind_resume_lock);
?
> + INIT_LIST_HEAD(&xe->rebind_resume_list);
> +
> /* For now suspend/resume is only allowed with GuC */
> if (!xe_device_uc_enabled(xe))
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index f55f96bb240a..97aad1d53a8c 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -394,6 +394,9 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
> list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
> &vm->rebind_list);
>
> + if (!try_wait_for_completion(&vm->xe->pm_block))
> + return -EAGAIN;
> +
> ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
> if (ret)
> return ret;
> @@ -480,6 +483,37 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
> return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
> }
>
> +static bool vm_suspend_preempt_worker(struct xe_vm *vm)
> +{
> + struct xe_device *xe = vm->xe;
> + bool ret = false;
> +
> + mutex_lock(&xe->rebind_resume_lock);
> + if (!try_wait_for_completion(&vm->xe->pm_block)) {
> + ret = true;
> + list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
> + }
> + pr_info("Suspending %p\n", vm);
> + mutex_unlock(&xe->rebind_resume_lock);
> +
> + return ret;
> +}
> +
> +/**
> + * xe_vm_resume_preempt_worker() - Resume the preempt worker.
> + * @vm: The vm whose preempt worker to resume.
> + *
> + * Resume a preempt worker that was previously suspended by
> + * vm_suspend_preempt_worker().
> + */
> +void xe_vm_resume_preempt_worker(struct list_head *link)
I guess should use vm arg here?
> +{
> + struct xe_vm *vm = container_of(link, typeof(*vm), preempt.pm_activate_link);
> +
> + pr_info("Resuming %p\n", vm);
> + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
> +}
> +
> static void preempt_rebind_work_func(struct work_struct *w)
> {
> struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
> @@ -503,6 +537,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
> }
>
> retry:
> + if (!try_wait_for_completion(&vm->xe->)) && vm_suspend_preempt_worker(vm)) {
> + up_write(&vm->lock);
> + return;
> + }
> +
> if (xe_vm_userptr_check_repin(vm)) {
> err = xe_vm_userptr_pin(vm);
> if (err)
> @@ -1741,6 +1780,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
> if (flags & XE_VM_FLAG_LR_MODE) {
> INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
> xe_pm_runtime_get_noresume(xe);
> + INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
> }
>
> if (flags & XE_VM_FLAG_FAULT_MODE) {
> @@ -1922,8 +1962,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> xe_assert(xe, !vm->preempt.num_exec_queues);
>
> xe_vm_close(vm);
> - if (xe_vm_in_preempt_fence_mode(vm))
> + if (xe_vm_in_preempt_fence_mode(vm)) {
> + mutex_lock(&xe->rebind_resume_lock);
> + list_del_init(&vm->preempt.pm_activate_link);
> + mutex_unlock(&xe->rebind_resume_lock);
> flush_work(&vm->preempt.rebind_work);
> + }
> if (xe_vm_in_fault_mode(vm))
> xe_svm_close(vm);
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index b3e5bec0fa58..f2639794278b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -281,6 +281,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
> struct xe_exec_queue *q, u64 addr,
> enum xe_cache_level cache_lvl);
>
> +void xe_vm_resume_preempt_worker(struct list_head *link);
> +
> /**
> * xe_vm_resv() - Return's the vm's reservation object
> * @vm: The vm
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index b5108d010786..e1a786db5f89 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -338,6 +338,11 @@ struct xe_vm {
> * BOs
> */
> struct work_struct rebind_work;
> + /**
> + * @preempt.pm_activate_link: Link to list of rebind workers to be
> + * kicked on resume.
> + */
> + struct list_head pm_activate_link;
> } preempt;
>
> /** @um: unified memory state */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate
2025-09-04 15:22 ` Matthew Auld
@ 2025-09-04 15:30 ` Thomas Hellström
0 siblings, 0 replies; 5+ messages in thread
From: Thomas Hellström @ 2025-09-04 15:30 UTC (permalink / raw)
To: Matthew Auld, intel-xe
Cc: Rodrigo Vivi, stable, Matthew Brost, Maarten Lankhorst
On Thu, 2025-09-04 at 16:22 +0100, Matthew Auld wrote:
> On 04/09/2025 14:06, Thomas Hellström wrote:
> > When the xe pm_notifier evicts for suspend / hibernate, there might
> > be
> > racing tasks trying to re-validate again. This can lead to suspend
> > taking
> > excessive time or get stuck in a live-lock. This behaviour becomes
> > much worse with the fix that actually makes re-validation bring
> > back
> > bos to VRAM rather than letting them remain in TT.
> >
> > Prevent that by having exec and the rebind worker waiting for a
> > completion
> > that is set to block by the pm_notifier before suspend and is
> > signaled
> > by the pm_notifier after resume / wakeup.
> >
> > It's probably still possible to craft malicious applications that
> > block
> > suspending. More work is pending to fix that.
> >
> > v3:
> > - Avoid wait_for_completion() in the kernel worker since it could
> > potentially cause work item flushes from freezable processes to
> > wait forever. Instead terminate the rebind workers if needed and
> > re-launch at resume. (Matt Auld)
> >
> > Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4288
> > Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier")
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: <stable@vger.kernel.org> # v6.16+
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_device_types.h | 6 ++++
> > drivers/gpu/drm/xe/xe_exec.c | 9 ++++++
> > drivers/gpu/drm/xe/xe_pm.c | 20 ++++++++++++
> > drivers/gpu/drm/xe/xe_vm.c | 46
> > +++++++++++++++++++++++++++-
> > drivers/gpu/drm/xe/xe_vm.h | 2 ++
> > drivers/gpu/drm/xe/xe_vm_types.h | 5 +++
> > 6 files changed, 87 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> > b/drivers/gpu/drm/xe/xe_device_types.h
> > index 092004d14db2..1e780f8a2a8c 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -507,6 +507,12 @@ struct xe_device {
> >
> > /** @pm_notifier: Our PM notifier to perform actions in
> > response to various PM events. */
> > struct notifier_block pm_notifier;
> > + /** @pm_block: Completion to block validating tasks on
> > suspend / hibernate prepare */
> > + struct completion pm_block;
> > + /** @rebind_resume_list: List of wq items to kick on
> > resume. */
> > + struct list_head rebind_resume_list;
> > + /** @rebind_resume_lock: Lock to protect the
> > rebind_resume_list */
> > + struct mutex rebind_resume_lock;
> >
> > /** @pmt: Support the PMT driver callback interface */
> > struct {
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > b/drivers/gpu/drm/xe/xe_exec.c
> > index 44364c042ad7..374c831e691b 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -237,6 +237,15 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> > goto err_unlock_list;
> > }
> >
> > + /*
> > + * It's OK to block interruptible here with the vm lock
> > held, since
> > + * on task freezing during suspend / hibernate, the call
> > will
> > + * return -ERESTARTSYS and the IOCTL will be rerun.
> > + */
> > + err = wait_for_completion_interruptible(&xe->pm_block);
> > + if (err)
> > + goto err_unlock_list;
> > +
> > vm_exec.vm = &vm->gpuvm;
> > vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
> > if (xe_vm_in_lr_mode(vm)) {
> > diff --git a/drivers/gpu/drm/xe/xe_pm.c
> > b/drivers/gpu/drm/xe/xe_pm.c
> > index bee9aacd82e7..6d59990ff6ba 100644
> > --- a/drivers/gpu/drm/xe/xe_pm.c
> > +++ b/drivers/gpu/drm/xe/xe_pm.c
> > @@ -297,6 +297,18 @@ static u32 vram_threshold_value(struct
> > xe_device *xe)
> > return DEFAULT_VRAM_THRESHOLD;
> > }
> >
> > +static void xe_pm_wake_preempt_workers(struct xe_device *xe)
> > +{
> > + struct list_head *link, *next;
> > +
> > + mutex_lock(&xe->rebind_resume_lock);
> > + list_for_each_safe(link, next, &xe->rebind_resume_list) {
> > + list_del_init(link);
> > + xe_vm_resume_preempt_worker(link);
> > + }
> > + mutex_unlock(&xe->rebind_resume_lock);
> > +}
> > +
> > static int xe_pm_notifier_callback(struct notifier_block *nb,
> > unsigned long action, void
> > *data)
> > {
> > @@ -306,6 +318,7 @@ static int xe_pm_notifier_callback(struct
> > notifier_block *nb,
> > switch (action) {
> > case PM_HIBERNATION_PREPARE:
> > case PM_SUSPEND_PREPARE:
> > + reinit_completion(&xe->pm_block);
> > xe_pm_runtime_get(xe);
> > err = xe_bo_evict_all_user(xe);
> > if (err)
> > @@ -322,6 +335,8 @@ static int xe_pm_notifier_callback(struct
> > notifier_block *nb,
> > break;
> > case PM_POST_HIBERNATION:
> > case PM_POST_SUSPEND:
> > + complete_all(&xe->pm_block);
> > + xe_pm_wake_preempt_workers(xe);
> > xe_bo_notifier_unprepare_all_pinned(xe);
> > xe_pm_runtime_put(xe);
> > break;
> > @@ -348,6 +363,11 @@ int xe_pm_init(struct xe_device *xe)
> > if (err)
> > return err;
> >
> > + init_completion(&xe->pm_block);
> > + complete_all(&xe->pm_block);
> > + mutex_init(&xe->rebind_resume_lock);
>
> err = drmm_mutex_init(&xe->rebind_resume_lock);
Sure.
>
> ?
>
> > + INIT_LIST_HEAD(&xe->rebind_resume_list);
> > +
> > /* For now suspend/resume is only allowed with GuC */
> > if (!xe_device_uc_enabled(xe))
> > return 0;
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > b/drivers/gpu/drm/xe/xe_vm.c
> > index f55f96bb240a..97aad1d53a8c 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -394,6 +394,9 @@ static int xe_gpuvm_validate(struct
> > drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
> > list_move_tail(&gpuva_to_vma(gpuva)-
> > >combined_links.rebind,
> > &vm->rebind_list);
> >
> > + if (!try_wait_for_completion(&vm->xe->pm_block))
> > + return -EAGAIN;
> > +
> > ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
> > if (ret)
> > return ret;
> > @@ -480,6 +483,37 @@ static int xe_preempt_work_begin(struct
> > drm_exec *exec, struct xe_vm *vm,
> > return xe_vm_validate_rebind(vm, exec, vm-
> > >preempt.num_exec_queues);
> > }
> >
> > +static bool vm_suspend_preempt_worker(struct xe_vm *vm)
> > +{
> > + struct xe_device *xe = vm->xe;
> > + bool ret = false;
> > +
> > + mutex_lock(&xe->rebind_resume_lock);
> > + if (!try_wait_for_completion(&vm->xe->pm_block)) {
> > + ret = true;
> > + list_move_tail(&vm->preempt.pm_activate_link, &xe-
> > >rebind_resume_list);
> > + }
> > + pr_info("Suspending %p\n", vm);
> > + mutex_unlock(&xe->rebind_resume_lock);
> > +
> > + return ret;
> > +}
> > +
> > +/**
> > + * xe_vm_resume_preempt_worker() - Resume the preempt worker.
> > + * @vm: The vm whose preempt worker to resume.
> > + *
> > + * Resume a preempt worker that was previously suspended by
> > + * vm_suspend_preempt_worker().
> > + */
> > +void xe_vm_resume_preempt_worker(struct list_head *link)
>
> I guess should use vm arg here?
Yes I tried to avoid downcasting from within xe_pm.c but since we need
to include xe_vm.h anyway, I might as well do that.
Thanks for reviewing!
/Thomas
>
> > +{
> > + struct xe_vm *vm = container_of(link, typeof(*vm),
> > preempt.pm_activate_link);
> > +
> > + pr_info("Resuming %p\n", vm);
> > + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
> > +}
> > +
> > static void preempt_rebind_work_func(struct work_struct *w)
> > {
> > struct xe_vm *vm = container_of(w, struct xe_vm,
> > preempt.rebind_work);
> > @@ -503,6 +537,11 @@ static void preempt_rebind_work_func(struct
> > work_struct *w)
> > }
> >
> > retry:
> > + if (!try_wait_for_completion(&vm->xe->)) &&
> > vm_suspend_preempt_worker(vm)) {
> > + up_write(&vm->lock);
> > + return;
> > + }
> > +
> > if (xe_vm_userptr_check_repin(vm)) {
> > err = xe_vm_userptr_pin(vm);
> > if (err)
> > @@ -1741,6 +1780,7 @@ struct xe_vm *xe_vm_create(struct xe_device
> > *xe, u32 flags, struct xe_file *xef)
> > if (flags & XE_VM_FLAG_LR_MODE) {
> > INIT_WORK(&vm->preempt.rebind_work,
> > preempt_rebind_work_func);
> > xe_pm_runtime_get_noresume(xe);
> > + INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
> > }
> >
> > if (flags & XE_VM_FLAG_FAULT_MODE) {
> > @@ -1922,8 +1962,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> > xe_assert(xe, !vm->preempt.num_exec_queues);
> >
> > xe_vm_close(vm);
> > - if (xe_vm_in_preempt_fence_mode(vm))
> > + if (xe_vm_in_preempt_fence_mode(vm)) {
> > + mutex_lock(&xe->rebind_resume_lock);
> > + list_del_init(&vm->preempt.pm_activate_link);
> > + mutex_unlock(&xe->rebind_resume_lock);
> > flush_work(&vm->preempt.rebind_work);
> > + }
> > if (xe_vm_in_fault_mode(vm))
> > xe_svm_close(vm);
> >
> > diff --git a/drivers/gpu/drm/xe/xe_vm.h
> > b/drivers/gpu/drm/xe/xe_vm.h
> > index b3e5bec0fa58..f2639794278b 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.h
> > +++ b/drivers/gpu/drm/xe/xe_vm.h
> > @@ -281,6 +281,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct
> > xe_vm *vm, struct xe_bo *bo,
> > struct xe_exec_queue *q,
> > u64 addr,
> > enum xe_cache_level
> > cache_lvl);
> >
> > +void xe_vm_resume_preempt_worker(struct list_head *link);
> > +
> > /**
> > * xe_vm_resv() - Return's the vm's reservation object
> > * @vm: The vm
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index b5108d010786..e1a786db5f89 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -338,6 +338,11 @@ struct xe_vm {
> > * BOs
> > */
> > struct work_struct rebind_work;
> > + /**
> > + * @preempt.pm_activate_link: Link to list of
> > rebind workers to be
> > + * kicked on resume.
> > + */
> > + struct list_head pm_activate_link;
> > } preempt;
> >
> > /** @um: unified memory state */
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-09-04 15:30 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20250904130614.3212-1-thomas.hellstrom@linux.intel.com>
2025-09-04 13:06 ` [PATCH v3 1/3] drm/xe: Attempt to bring bos back to VRAM after eviction Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 2/3] drm/xe: Allow the pm notifier to continue on failure Thomas Hellström
2025-09-04 13:06 ` [PATCH v3 3/3] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate Thomas Hellström
2025-09-04 15:22 ` Matthew Auld
2025-09-04 15:30 ` Thomas Hellström
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox