* [PATCH 0/3] drm/ttm, drm/xe: Consolidate the Buffer Object LRU walks
@ 2025-06-13 15:18 Thomas Hellström
2025-06-13 15:18 ` [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor Thomas Hellström
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Thomas Hellström @ 2025-06-13 15:18 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, airlied, Matthew Brost,
Matthew Auld, Christian König
This is a deferred task from the Xe bo shrinker addition.
TTM currently have two separate ways of doing buffer object LRU
walks, of which one is exposed to drivers. Both have their benefits
but we shouldn't be implementing the complex bo locking in
two different places. So implement the ttm_lru_walk_for_evict()
function in terms of the guarded iteration also exposed to drivers.
This means that when we get to implement locking using drm_exec,
we only need to do that in a single place.
Add ticketlocking support to the guarded iteration and modify
the iteration arguments.
Thomas Hellström (3):
drm/ttm: Use a struct for the common part of struct ttm_lru_walk and
struct ttm_bo_lru_cursor.
drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor
initialization
drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded
LRU iteration
drivers/gpu/drm/ttm/ttm_bo.c | 24 ++--
drivers/gpu/drm/ttm/ttm_bo_util.c | 186 ++++++++++++------------------
drivers/gpu/drm/xe/xe_shrinker.c | 8 +-
include/drm/ttm/ttm_bo.h | 44 ++++---
4 files changed, 122 insertions(+), 140 deletions(-)
--
2.49.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor.
2025-06-13 15:18 [PATCH 0/3] drm/ttm, drm/xe: Consolidate the Buffer Object LRU walks Thomas Hellström
@ 2025-06-13 15:18 ` Thomas Hellström
2025-06-16 13:04 ` Christian König
2025-06-13 15:18 ` [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization Thomas Hellström
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
2 siblings, 1 reply; 10+ messages in thread
From: Thomas Hellström @ 2025-06-13 15:18 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, airlied, Matthew Brost,
Matthew Auld, Christian König
Let the locking functions take the new struct ttm_lru_walk_arg
as argument in order for them to be easily used from both
types of walk.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 24 ++++++++++++++----------
drivers/gpu/drm/ttm/ttm_bo_util.c | 26 ++++++++++++++------------
include/drm/ttm/ttm_bo.h | 23 ++++++++++++++---------
3 files changed, 42 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0f874f1e2526..eca486dff976 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -525,11 +525,11 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
return 0;
if (bo->deleted) {
- lret = ttm_bo_wait_ctx(bo, walk->ctx);
+ lret = ttm_bo_wait_ctx(bo, walk->arg.ctx);
if (!lret)
ttm_bo_cleanup_memtype_use(bo);
} else {
- lret = ttm_bo_evict(bo, walk->ctx);
+ lret = ttm_bo_evict(bo, walk->arg.ctx);
}
if (lret)
@@ -565,8 +565,10 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
struct ttm_bo_evict_walk evict_walk = {
.walk = {
.ops = &ttm_evict_walk_ops,
- .ctx = ctx,
- .ticket = ticket,
+ .arg = {
+ .ctx = ctx,
+ .ticket = ticket,
+ }
},
.place = place,
.evictor = evictor,
@@ -575,7 +577,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
};
s64 lret;
- evict_walk.walk.trylock_only = true;
+ evict_walk.walk.arg.trylock_only = true;
lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
/* One more attempt if we hit low limit? */
@@ -589,12 +591,12 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
/* Reset low limit */
evict_walk.try_low = evict_walk.hit_low = false;
/* If ticket-locking, repeat while making progress. */
- evict_walk.walk.trylock_only = false;
+ evict_walk.walk.arg.trylock_only = false;
retry:
do {
/* The walk may clear the evict_walk.walk.ticket field */
- evict_walk.walk.ticket = ticket;
+ evict_walk.walk.arg.ticket = ticket;
evict_walk.evicted = 0;
lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
} while (!lret && evict_walk.evicted);
@@ -1105,7 +1107,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
struct ttm_place place = {.mem_type = bo->resource->mem_type};
struct ttm_bo_swapout_walk *swapout_walk =
container_of(walk, typeof(*swapout_walk), walk);
- struct ttm_operation_ctx *ctx = walk->ctx;
+ struct ttm_operation_ctx *ctx = walk->arg.ctx;
s64 ret;
/*
@@ -1216,8 +1218,10 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
struct ttm_bo_swapout_walk swapout_walk = {
.walk = {
.ops = &ttm_swap_ops,
- .ctx = ctx,
- .trylock_only = true,
+ .arg = {
+ .ctx = ctx,
+ .trylock_only = true,
+ },
},
.gfp_flags = gfp_flags,
};
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index b78365dc1fed..600145cdeb9c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -771,10 +771,12 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
return ret;
}
-static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx,
+static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
struct ttm_buffer_object *bo,
bool *needs_unlock)
{
+ struct ttm_operation_ctx *ctx = arg->ctx;
+
*needs_unlock = false;
if (dma_resv_trylock(bo->base.resv)) {
@@ -790,17 +792,17 @@ static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx,
return false;
}
-static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
+static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
struct ttm_buffer_object *bo,
bool *needs_unlock)
{
struct dma_resv *resv = bo->base.resv;
int ret;
- if (walk->ctx->interruptible)
- ret = dma_resv_lock_interruptible(resv, walk->ticket);
+ if (arg->ctx->interruptible)
+ ret = dma_resv_lock_interruptible(resv, arg->ticket);
else
- ret = dma_resv_lock(resv, walk->ticket);
+ ret = dma_resv_lock(resv, arg->ticket);
if (!ret) {
*needs_unlock = true;
@@ -810,7 +812,7 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
* after waiting for the ticketlock, revert back to
* trylocking for this walk.
*/
- walk->ticket = NULL;
+ arg->ticket = NULL;
} else if (ret == -EDEADLK) {
/* Caller needs to exit the ww transaction. */
ret = -ENOSPC;
@@ -877,10 +879,10 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
* since if we do it the other way around, and the trylock fails,
* we need to drop the lru lock to put the bo.
*/
- if (ttm_lru_walk_trylock(walk->ctx, bo, &bo_needs_unlock))
+ if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
bo_locked = true;
- else if (!walk->ticket || walk->ctx->no_wait_gpu ||
- walk->trylock_only)
+ else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
+ walk->arg.trylock_only)
continue;
if (!ttm_bo_get_unless_zero(bo)) {
@@ -893,7 +895,7 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
lret = 0;
if (!bo_locked)
- lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock);
+ lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);
/*
* Note that in between the release of the lru lock and the
@@ -970,7 +972,7 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
{
memset(curs, 0, sizeof(*curs));
ttm_resource_cursor_init(&curs->res_curs, man);
- curs->ctx = ctx;
+ curs->arg.ctx = ctx;
return curs;
}
@@ -981,7 +983,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
{
struct ttm_buffer_object *bo = res->bo;
- if (!ttm_lru_walk_trylock(curs->ctx, bo, &curs->needs_unlock))
+ if (!ttm_lru_walk_trylock(&curs->arg, bo, &curs->needs_unlock))
return NULL;
if (!ttm_bo_get_unless_zero(bo)) {
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8ad6e2713625..4e52283e5db1 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -207,11 +207,9 @@ struct ttm_lru_walk_ops {
};
/**
- * struct ttm_lru_walk - Structure describing a LRU walk.
+ * struct ttm_lru_walk_arg - Common part for the variants of BO LRU walk.
*/
-struct ttm_lru_walk {
- /** @ops: Pointer to the ops structure. */
- const struct ttm_lru_walk_ops *ops;
+struct ttm_lru_walk_arg {
/** @ctx: Pointer to the struct ttm_operation_ctx. */
struct ttm_operation_ctx *ctx;
/** @ticket: The struct ww_acquire_ctx if any. */
@@ -219,6 +217,16 @@ struct ttm_lru_walk {
/** @trylock_only: Only use trylock for locking. */
bool trylock_only;
};
+
+/**
+ * struct ttm_lru_walk - Structure describing a LRU walk.
+ */
+struct ttm_lru_walk {
+ /** @ops: Pointer to the ops structure. */
+ const struct ttm_lru_walk_ops *ops;
+ /** @arg: Common bo LRU walk arguments. */
+ struct ttm_lru_walk_arg arg;
+};
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
struct ttm_resource_manager *man, s64 target);
@@ -466,11 +474,6 @@ int ttm_bo_populate(struct ttm_buffer_object *bo,
struct ttm_bo_lru_cursor {
/** @res_curs: Embedded struct ttm_resource_cursor. */
struct ttm_resource_cursor res_curs;
- /**
- * @ctx: The struct ttm_operation_ctx used while looping.
- * governs the locking mode.
- */
- struct ttm_operation_ctx *ctx;
/**
* @bo: Buffer object pointer if a buffer object is refcounted,
* NULL otherwise.
@@ -481,6 +484,8 @@ struct ttm_bo_lru_cursor {
* unlock before the next iteration or after loop exit.
*/
bool needs_unlock;
+ /** @arg: Common BO LRU walk arguments. */
+ struct ttm_lru_walk_arg arg;
};
void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
--
2.49.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization
2025-06-13 15:18 [PATCH 0/3] drm/ttm, drm/xe: Consolidate the Buffer Object LRU walks Thomas Hellström
2025-06-13 15:18 ` [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor Thomas Hellström
@ 2025-06-13 15:18 ` Thomas Hellström
2025-06-16 13:15 ` Christian König
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
2 siblings, 1 reply; 10+ messages in thread
From: Thomas Hellström @ 2025-06-13 15:18 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, airlied, Matthew Brost,
Matthew Auld, Christian König
Instead of the struct ttm_operation_ctx, Pass a struct ttm_lru_walk_arg
to enable us to easily extend the walk functionality, and to
implement ttm_lru_walk_for_evict() using the guarded LRU iteration.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_bo_util.c | 10 +++++-----
drivers/gpu/drm/xe/xe_shrinker.c | 3 ++-
include/drm/ttm/ttm_bo.h | 16 ++++++++--------
3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 600145cdeb9c..62b76abac578 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -956,11 +956,11 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
* ttm_bo_lru_cursor_init() - Initialize a struct ttm_bo_lru_cursor
* @curs: The ttm_bo_lru_cursor to initialize.
* @man: The ttm resource_manager whose LRU lists to iterate over.
- * @ctx: The ttm_operation_ctx to govern the locking.
+ * @arg: The ttm_lru_walk_arg to govern the walk.
*
* Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
* or prelocked buffer objects are available as detailed by
- * @ctx::resv and @ctx::allow_res_evict. Ticketlocking is not
+ * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
* supported.
*
* Return: Pointer to @curs. The function does not fail.
@@ -968,11 +968,11 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
struct ttm_bo_lru_cursor *
ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
struct ttm_resource_manager *man,
- struct ttm_operation_ctx *ctx)
+ struct ttm_lru_walk_arg *arg)
{
memset(curs, 0, sizeof(*curs));
ttm_resource_cursor_init(&curs->res_curs, man);
- curs->arg.ctx = ctx;
+ curs->arg = arg;
return curs;
}
@@ -983,7 +983,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
{
struct ttm_buffer_object *bo = res->bo;
- if (!ttm_lru_walk_trylock(&curs->arg, bo, &curs->needs_unlock))
+ if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
return NULL;
if (!ttm_bo_get_unless_zero(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 125c836e0ee4..f8a1129da2c3 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -66,11 +66,12 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
struct ttm_bo_lru_cursor curs;
struct ttm_buffer_object *ttm_bo;
+ struct ttm_lru_walk_arg arg = {.ctx = ctx};
if (!man || !man->use_tt)
continue;
- ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) {
+ ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) {
if (!ttm_bo_shrink_suitable(ttm_bo, ctx))
continue;
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 4e52283e5db1..8f04fa48b332 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -484,8 +484,8 @@ struct ttm_bo_lru_cursor {
* unlock before the next iteration or after loop exit.
*/
bool needs_unlock;
- /** @arg: Common BO LRU walk arguments. */
- struct ttm_lru_walk_arg arg;
+ /** @arg: Pointer to common BO LRU walk arguments. */
+ struct ttm_lru_walk_arg *arg;
};
void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
@@ -493,7 +493,7 @@ void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
struct ttm_bo_lru_cursor *
ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
struct ttm_resource_manager *man,
- struct ttm_operation_ctx *ctx);
+ struct ttm_lru_walk_arg *arg);
struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs);
@@ -504,9 +504,9 @@ struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
*/
DEFINE_CLASS(ttm_bo_lru_cursor, struct ttm_bo_lru_cursor *,
if (_T) {ttm_bo_lru_cursor_fini(_T); },
- ttm_bo_lru_cursor_init(curs, man, ctx),
+ ttm_bo_lru_cursor_init(curs, man, arg),
struct ttm_bo_lru_cursor *curs, struct ttm_resource_manager *man,
- struct ttm_operation_ctx *ctx);
+ struct ttm_lru_walk_arg *arg);
static inline void *
class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
{ return *_T; }
@@ -517,7 +517,7 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
* resources on LRU lists.
* @_cursor: struct ttm_bo_lru_cursor to use for the iteration.
* @_man: The resource manager whose LRU lists to iterate over.
- * @_ctx: The struct ttm_operation_context to govern the @_bo locking.
+ * @_arg: The struct ttm_lru_walk_arg to govern the LRU walk.
* @_bo: The struct ttm_buffer_object pointer pointing to the buffer object
* for the current iteration.
*
@@ -530,8 +530,8 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
* example a return or break statement. Exiting the loop will also unlock
* (if needed) and unreference @_bo.
*/
-#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _ctx, _bo) \
- scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _ctx) \
+#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \
+ scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \
for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \
(_bo) = ttm_bo_lru_cursor_next(_cursor))
--
2.49.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
2025-06-13 15:18 [PATCH 0/3] drm/ttm, drm/xe: Consolidate the Buffer Object LRU walks Thomas Hellström
2025-06-13 15:18 ` [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor Thomas Hellström
2025-06-13 15:18 ` [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization Thomas Hellström
@ 2025-06-13 15:18 ` Thomas Hellström
2025-06-13 18:39 ` kernel test robot
` (2 more replies)
2 siblings, 3 replies; 10+ messages in thread
From: Thomas Hellström @ 2025-06-13 15:18 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, dri-devel, airlied, Matthew Brost,
Matthew Auld, Christian König
To avoid duplicating the tricky bo locking implementation,
Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration.
To facilitate this, support ticketlocking from the guarded bo LRU
iteration.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_bo_util.c | 166 ++++++++++++------------------
drivers/gpu/drm/xe/xe_shrinker.c | 7 +-
include/drm/ttm/ttm_bo.h | 9 +-
3 files changed, 76 insertions(+), 106 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 62b76abac578..9bc17ea1adb2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -821,12 +821,6 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
return ret;
}
-static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
-{
- if (locked)
- dma_resv_unlock(bo->base.resv);
-}
-
/**
* ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
* valid items.
@@ -861,64 +855,21 @@ static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
struct ttm_resource_manager *man, s64 target)
{
- struct ttm_resource_cursor cursor;
- struct ttm_resource *res;
+ struct ttm_bo_lru_cursor cursor;
+ struct ttm_buffer_object *bo;
s64 progress = 0;
s64 lret;
- spin_lock(&bdev->lru_lock);
- ttm_resource_cursor_init(&cursor, man);
- ttm_resource_manager_for_each_res(&cursor, res) {
- struct ttm_buffer_object *bo = res->bo;
- bool bo_needs_unlock = false;
- bool bo_locked = false;
- int mem_type;
-
- /*
- * Attempt a trylock before taking a reference on the bo,
- * since if we do it the other way around, and the trylock fails,
- * we need to drop the lru lock to put the bo.
- */
- if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
- bo_locked = true;
- else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
- walk->arg.trylock_only)
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- ttm_lru_walk_unlock(bo, bo_needs_unlock);
- continue;
- }
-
- mem_type = res->mem_type;
- spin_unlock(&bdev->lru_lock);
-
- lret = 0;
- if (!bo_locked)
- lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);
-
- /*
- * Note that in between the release of the lru lock and the
- * ticketlock, the bo may have switched resource,
- * and also memory type, since the resource may have been
- * freed and allocated again with a different memory type.
- * In that case, just skip it.
- */
- if (!lret && bo->resource && bo->resource->mem_type == mem_type)
- lret = walk->ops->process_bo(walk, bo);
-
- ttm_lru_walk_unlock(bo, bo_needs_unlock);
- ttm_bo_put(bo);
+ ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) {
+ lret = walk->ops->process_bo(walk, bo);
if (lret == -EBUSY || lret == -EALREADY)
lret = 0;
progress = (lret < 0) ? lret : progress + lret;
-
- spin_lock(&bdev->lru_lock);
if (progress < 0 || progress >= target)
break;
}
- ttm_resource_cursor_fini(&cursor);
- spin_unlock(&bdev->lru_lock);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
return progress;
}
@@ -958,10 +909,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
* @man: The ttm resource_manager whose LRU lists to iterate over.
* @arg: The ttm_lru_walk_arg to govern the walk.
*
- * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
- * or prelocked buffer objects are available as detailed by
- * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
- * supported.
+ * Initialize a struct ttm_bo_lru_cursor.
*
* Return: Pointer to @curs. The function does not fail.
*/
@@ -979,21 +927,65 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
static struct ttm_buffer_object *
-ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *curs)
+__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool first)
{
- struct ttm_buffer_object *bo = res->bo;
+ spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
+ struct ttm_resource *res = NULL;
+ struct ttm_buffer_object *bo;
+ struct ttm_lru_walk_arg *arg = curs->arg;
- if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
- return NULL;
+ ttm_bo_lru_cursor_cleanup_bo(curs);
- if (!ttm_bo_get_unless_zero(bo)) {
- if (curs->needs_unlock)
- dma_resv_unlock(bo->base.resv);
- return NULL;
+ spin_lock(lru_lock);
+ for (;;) {
+ int mem_type, ret;
+ bool bo_locked = false;
+
+ if (first) {
+ res = ttm_resource_manager_first(&curs->res_curs);
+ first = false;
+ } else {
+ res = ttm_resource_manager_next(&curs->res_curs);
+ }
+ if (!res)
+ break;
+
+ bo = res->bo;
+ if (ttm_lru_walk_trylock(arg, bo, &curs->needs_unlock))
+ bo_locked = true;
+ else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
+ continue;
+
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (curs->needs_unlock)
+ dma_resv_unlock(bo->base.resv);
+ continue;
+ }
+
+ mem_type = res->mem_type;
+ spin_unlock(lru_lock);
+ if (!bo_locked)
+ ret = ttm_lru_walk_ticketlock(arg, bo, &curs->needs_unlock);
+ /*
+ * Note that in between the release of the lru lock and the
+ * ticketlock, the bo may have switched resource,
+ * and also memory type, since the resource may have been
+ * freed and allocated again with a different memory type.
+ * In that case, just skip it.
+ */
+ curs->bo = bo;
+ if (!ret && bo->resource && bo->resource->mem_type == mem_type)
+ return bo;
+
+ ttm_bo_lru_cursor_cleanup_bo(curs);
+ if (ret)
+ return ERR_PTR(ret);
+
+ spin_lock(lru_lock);
}
- curs->bo = bo;
- return bo;
+ spin_unlock(lru_lock);
+ return res ? bo : NULL;
}
/**
@@ -1007,25 +999,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
*/
struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
{
- spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
- struct ttm_resource *res = NULL;
- struct ttm_buffer_object *bo;
-
- ttm_bo_lru_cursor_cleanup_bo(curs);
-
- spin_lock(lru_lock);
- for (;;) {
- res = ttm_resource_manager_next(&curs->res_curs);
- if (!res)
- break;
-
- bo = ttm_bo_from_res_reserved(res, curs);
- if (bo)
- break;
- }
-
- spin_unlock(lru_lock);
- return res ? bo : NULL;
+ return __ttm_bo_lru_cursor_next(curs, false);
}
EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
@@ -1039,21 +1013,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
*/
struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs)
{
- spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
- struct ttm_buffer_object *bo;
- struct ttm_resource *res;
-
- spin_lock(lru_lock);
- res = ttm_resource_manager_first(&curs->res_curs);
- if (!res) {
- spin_unlock(lru_lock);
- return NULL;
- }
-
- bo = ttm_bo_from_res_reserved(res, curs);
- spin_unlock(lru_lock);
-
- return bo ? bo : ttm_bo_lru_cursor_next(curs);
+ return __ttm_bo_lru_cursor_next(curs, true);
}
EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index f8a1129da2c3..1c3c04d52f55 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -66,7 +66,10 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
struct ttm_bo_lru_cursor curs;
struct ttm_buffer_object *ttm_bo;
- struct ttm_lru_walk_arg arg = {.ctx = ctx};
+ struct ttm_lru_walk_arg arg = {
+ .ctx = ctx,
+ .trylock_only = true,
+ };
if (!man || !man->use_tt)
continue;
@@ -83,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
if (*scanned >= to_scan)
break;
}
+ /* Trylocks should never error, just fail. */
+ xe_assert(xe, !IS_ERR(ttm_bo));
}
return freed;
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8f04fa48b332..d3a85d76aaff 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -529,10 +529,15 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
* up at looping termination, even if terminated prematurely by, for
* example a return or break statement. Exiting the loop will also unlock
* (if needed) and unreference @_bo.
+ *
+ * Return: If locking of a bo returns an error, then iteration is terminated
+ * and @_bo is set to a corresponding error pointer. It's illegal to
+ * dereference @_bo after loop exit.
*/
#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \
scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \
- for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \
- (_bo) = ttm_bo_lru_cursor_next(_cursor))
+ for ((_bo) = ttm_bo_lru_cursor_first(_cursor); \
+ !IS_ERR_OR_NULL(_bo); \
+ (_bo) = ttm_bo_lru_cursor_next(_cursor))
#endif
--
2.49.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
@ 2025-06-13 18:39 ` kernel test robot
2025-06-16 13:23 ` Christian König
2025-06-18 17:43 ` Dan Carpenter
2 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2025-06-13 18:39 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: llvm, oe-kbuild-all, Thomas Hellström, dri-devel, airlied,
Matthew Brost, Matthew Auld, Christian König
Hi Thomas,
kernel test robot noticed the following build warnings:
[auto build test WARNING on drm-xe/drm-xe-next]
[also build test WARNING on linus/master v6.16-rc1 next-20250613]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Use-a-struct-for-the-common-part-of-struct-ttm_lru_walk-and-struct-ttm_bo_lru_cursor/20250613-232106
base: https://gitlab.freedesktop.org/drm/xe/kernel.git drm-xe-next
patch link: https://lore.kernel.org/r/20250613151824.178650-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
config: i386-buildonly-randconfig-005-20250613 (https://download.01.org/0day-ci/archive/20250614/202506140238.KCnSVmrU-lkp@intel.com/config)
compiler: clang version 20.1.2 (https://github.com/llvm/llvm-project 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250614/202506140238.KCnSVmrU-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506140238.KCnSVmrU-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_bo_util.c:965:7: warning: variable 'ret' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
965 | if (!bo_locked)
| ^~~~~~~~~~
drivers/gpu/drm/ttm/ttm_bo_util.c:975:8: note: uninitialized use occurs here
975 | if (!ret && bo->resource && bo->resource->mem_type == mem_type)
| ^~~
drivers/gpu/drm/ttm/ttm_bo_util.c:965:3: note: remove the 'if' if its condition is always true
965 | if (!bo_locked)
| ^~~~~~~~~~~~~~~
966 | ret = ttm_lru_walk_ticketlock(arg, bo, &curs->needs_unlock);
drivers/gpu/drm/ttm/ttm_bo_util.c:939:20: note: initialize the variable 'ret' to silence this warning
939 | int mem_type, ret;
| ^
| = 0
1 warning generated.
vim +965 drivers/gpu/drm/ttm/ttm_bo_util.c
926
927 static struct ttm_buffer_object *
928 __ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool first)
929 {
930 spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
931 struct ttm_resource *res = NULL;
932 struct ttm_buffer_object *bo;
933 struct ttm_lru_walk_arg *arg = curs->arg;
934
935 ttm_bo_lru_cursor_cleanup_bo(curs);
936
937 spin_lock(lru_lock);
938 for (;;) {
939 int mem_type, ret;
940 bool bo_locked = false;
941
942 if (first) {
943 res = ttm_resource_manager_first(&curs->res_curs);
944 first = false;
945 } else {
946 res = ttm_resource_manager_next(&curs->res_curs);
947 }
948 if (!res)
949 break;
950
951 bo = res->bo;
952 if (ttm_lru_walk_trylock(arg, bo, &curs->needs_unlock))
953 bo_locked = true;
954 else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
955 continue;
956
957 if (!ttm_bo_get_unless_zero(bo)) {
958 if (curs->needs_unlock)
959 dma_resv_unlock(bo->base.resv);
960 continue;
961 }
962
963 mem_type = res->mem_type;
964 spin_unlock(lru_lock);
> 965 if (!bo_locked)
966 ret = ttm_lru_walk_ticketlock(arg, bo, &curs->needs_unlock);
967 /*
968 * Note that in between the release of the lru lock and the
969 * ticketlock, the bo may have switched resource,
970 * and also memory type, since the resource may have been
971 * freed and allocated again with a different memory type.
972 * In that case, just skip it.
973 */
974 curs->bo = bo;
975 if (!ret && bo->resource && bo->resource->mem_type == mem_type)
976 return bo;
977
978 ttm_bo_lru_cursor_cleanup_bo(curs);
979 if (ret)
980 return ERR_PTR(ret);
981
982 spin_lock(lru_lock);
983 }
984
985 spin_unlock(lru_lock);
986 return res ? bo : NULL;
987 }
988
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor.
2025-06-13 15:18 ` [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor Thomas Hellström
@ 2025-06-16 13:04 ` Christian König
0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2025-06-16 13:04 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: dri-devel, airlied, Matthew Brost, Matthew Auld
On 6/13/25 17:18, Thomas Hellström wrote:
> Let the locking functions take the new struct ttm_lru_walk_arg
> as argument in order for them to be easily used from both
> types of walk.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
We could even clean that up further I think, but for now Reviewed-by: Christian König <christian.koenig@amd.com>
Regards,
Christian.
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 24 ++++++++++++++----------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 26 ++++++++++++++------------
> include/drm/ttm/ttm_bo.h | 23 ++++++++++++++---------
> 3 files changed, 42 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 0f874f1e2526..eca486dff976 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -525,11 +525,11 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
> return 0;
>
> if (bo->deleted) {
> - lret = ttm_bo_wait_ctx(bo, walk->ctx);
> + lret = ttm_bo_wait_ctx(bo, walk->arg.ctx);
> if (!lret)
> ttm_bo_cleanup_memtype_use(bo);
> } else {
> - lret = ttm_bo_evict(bo, walk->ctx);
> + lret = ttm_bo_evict(bo, walk->arg.ctx);
> }
>
> if (lret)
> @@ -565,8 +565,10 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
> struct ttm_bo_evict_walk evict_walk = {
> .walk = {
> .ops = &ttm_evict_walk_ops,
> - .ctx = ctx,
> - .ticket = ticket,
> + .arg = {
> + .ctx = ctx,
> + .ticket = ticket,
> + }
> },
> .place = place,
> .evictor = evictor,
> @@ -575,7 +577,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
> };
> s64 lret;
>
> - evict_walk.walk.trylock_only = true;
> + evict_walk.walk.arg.trylock_only = true;
> lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
>
> /* One more attempt if we hit low limit? */
> @@ -589,12 +591,12 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
> /* Reset low limit */
> evict_walk.try_low = evict_walk.hit_low = false;
> /* If ticket-locking, repeat while making progress. */
> - evict_walk.walk.trylock_only = false;
> + evict_walk.walk.arg.trylock_only = false;
>
> retry:
> do {
> /* The walk may clear the evict_walk.walk.ticket field */
> - evict_walk.walk.ticket = ticket;
> + evict_walk.walk.arg.ticket = ticket;
> evict_walk.evicted = 0;
> lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
> } while (!lret && evict_walk.evicted);
> @@ -1105,7 +1107,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
> struct ttm_place place = {.mem_type = bo->resource->mem_type};
> struct ttm_bo_swapout_walk *swapout_walk =
> container_of(walk, typeof(*swapout_walk), walk);
> - struct ttm_operation_ctx *ctx = walk->ctx;
> + struct ttm_operation_ctx *ctx = walk->arg.ctx;
> s64 ret;
>
> /*
> @@ -1216,8 +1218,10 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> struct ttm_bo_swapout_walk swapout_walk = {
> .walk = {
> .ops = &ttm_swap_ops,
> - .ctx = ctx,
> - .trylock_only = true,
> + .arg = {
> + .ctx = ctx,
> + .trylock_only = true,
> + },
> },
> .gfp_flags = gfp_flags,
> };
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index b78365dc1fed..600145cdeb9c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -771,10 +771,12 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
> return ret;
> }
>
> -static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx,
> +static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
> struct ttm_buffer_object *bo,
> bool *needs_unlock)
> {
> + struct ttm_operation_ctx *ctx = arg->ctx;
> +
> *needs_unlock = false;
>
> if (dma_resv_trylock(bo->base.resv)) {
> @@ -790,17 +792,17 @@ static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx,
> return false;
> }
>
> -static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
> +static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
> struct ttm_buffer_object *bo,
> bool *needs_unlock)
> {
> struct dma_resv *resv = bo->base.resv;
> int ret;
>
> - if (walk->ctx->interruptible)
> - ret = dma_resv_lock_interruptible(resv, walk->ticket);
> + if (arg->ctx->interruptible)
> + ret = dma_resv_lock_interruptible(resv, arg->ticket);
> else
> - ret = dma_resv_lock(resv, walk->ticket);
> + ret = dma_resv_lock(resv, arg->ticket);
>
> if (!ret) {
> *needs_unlock = true;
> @@ -810,7 +812,7 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
> * after waiting for the ticketlock, revert back to
> * trylocking for this walk.
> */
> - walk->ticket = NULL;
> + arg->ticket = NULL;
> } else if (ret == -EDEADLK) {
> /* Caller needs to exit the ww transaction. */
> ret = -ENOSPC;
> @@ -877,10 +879,10 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
> * since if we do it the other way around, and the trylock fails,
> * we need to drop the lru lock to put the bo.
> */
> - if (ttm_lru_walk_trylock(walk->ctx, bo, &bo_needs_unlock))
> + if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
> bo_locked = true;
> - else if (!walk->ticket || walk->ctx->no_wait_gpu ||
> - walk->trylock_only)
> + else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
> + walk->arg.trylock_only)
> continue;
>
> if (!ttm_bo_get_unless_zero(bo)) {
> @@ -893,7 +895,7 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
>
> lret = 0;
> if (!bo_locked)
> - lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock);
> + lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);
>
> /*
> * Note that in between the release of the lru lock and the
> @@ -970,7 +972,7 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
> {
> memset(curs, 0, sizeof(*curs));
> ttm_resource_cursor_init(&curs->res_curs, man);
> - curs->ctx = ctx;
> + curs->arg.ctx = ctx;
>
> return curs;
> }
> @@ -981,7 +983,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
> {
> struct ttm_buffer_object *bo = res->bo;
>
> - if (!ttm_lru_walk_trylock(curs->ctx, bo, &curs->needs_unlock))
> + if (!ttm_lru_walk_trylock(&curs->arg, bo, &curs->needs_unlock))
> return NULL;
>
> if (!ttm_bo_get_unless_zero(bo)) {
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 8ad6e2713625..4e52283e5db1 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -207,11 +207,9 @@ struct ttm_lru_walk_ops {
> };
>
> /**
> - * struct ttm_lru_walk - Structure describing a LRU walk.
> + * struct ttm_lru_walk_arg - Common part for the variants of BO LRU walk.
> */
> -struct ttm_lru_walk {
> - /** @ops: Pointer to the ops structure. */
> - const struct ttm_lru_walk_ops *ops;
> +struct ttm_lru_walk_arg {
> /** @ctx: Pointer to the struct ttm_operation_ctx. */
> struct ttm_operation_ctx *ctx;
> /** @ticket: The struct ww_acquire_ctx if any. */
> @@ -219,6 +217,16 @@ struct ttm_lru_walk {
> /** @trylock_only: Only use trylock for locking. */
> bool trylock_only;
> };
> +
> +/**
> + * struct ttm_lru_walk - Structure describing a LRU walk.
> + */
> +struct ttm_lru_walk {
> + /** @ops: Pointer to the ops structure. */
> + const struct ttm_lru_walk_ops *ops;
> + /** @arg: Common bo LRU walk arguments. */
> + struct ttm_lru_walk_arg arg;
> +};
>
> s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
> struct ttm_resource_manager *man, s64 target);
> @@ -466,11 +474,6 @@ int ttm_bo_populate(struct ttm_buffer_object *bo,
> struct ttm_bo_lru_cursor {
> /** @res_curs: Embedded struct ttm_resource_cursor. */
> struct ttm_resource_cursor res_curs;
> - /**
> - * @ctx: The struct ttm_operation_ctx used while looping.
> - * governs the locking mode.
> - */
> - struct ttm_operation_ctx *ctx;
> /**
> * @bo: Buffer object pointer if a buffer object is refcounted,
> * NULL otherwise.
> @@ -481,6 +484,8 @@ struct ttm_bo_lru_cursor {
> * unlock before the next iteration or after loop exit.
> */
> bool needs_unlock;
> + /** @arg: Common BO LRU walk arguments. */
> + struct ttm_lru_walk_arg arg;
> };
>
> void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization
2025-06-13 15:18 ` [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization Thomas Hellström
@ 2025-06-16 13:15 ` Christian König
0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2025-06-16 13:15 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: dri-devel, airlied, Matthew Brost, Matthew Auld
On 6/13/25 17:18, Thomas Hellström wrote:
> Instead of the struct ttm_operation_ctx, Pass a struct ttm_lru_walk_arg
> to enable us to easily extend the walk functionality, and to
> implement ttm_lru_walk_for_evict() using the guarded LRU iteration.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo_util.c | 10 +++++-----
> drivers/gpu/drm/xe/xe_shrinker.c | 3 ++-
> include/drm/ttm/ttm_bo.h | 16 ++++++++--------
> 3 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 600145cdeb9c..62b76abac578 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -956,11 +956,11 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
> * ttm_bo_lru_cursor_init() - Initialize a struct ttm_bo_lru_cursor
> * @curs: The ttm_bo_lru_cursor to initialize.
> * @man: The ttm resource_manager whose LRU lists to iterate over.
> - * @ctx: The ttm_operation_ctx to govern the locking.
> + * @arg: The ttm_lru_walk_arg to govern the walk.
> *
> * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
> * or prelocked buffer objects are available as detailed by
> - * @ctx::resv and @ctx::allow_res_evict. Ticketlocking is not
> + * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
> * supported.
> *
> * Return: Pointer to @curs. The function does not fail.
> @@ -968,11 +968,11 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
> struct ttm_bo_lru_cursor *
> ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
> struct ttm_resource_manager *man,
> - struct ttm_operation_ctx *ctx)
> + struct ttm_lru_walk_arg *arg)
> {
> memset(curs, 0, sizeof(*curs));
> ttm_resource_cursor_init(&curs->res_curs, man);
> - curs->arg.ctx = ctx;
> + curs->arg = arg;
>
> return curs;
> }
> @@ -983,7 +983,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
> {
> struct ttm_buffer_object *bo = res->bo;
>
> - if (!ttm_lru_walk_trylock(&curs->arg, bo, &curs->needs_unlock))
> + if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
> return NULL;
>
> if (!ttm_bo_get_unless_zero(bo)) {
> diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
> index 125c836e0ee4..f8a1129da2c3 100644
> --- a/drivers/gpu/drm/xe/xe_shrinker.c
> +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> @@ -66,11 +66,12 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
> struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
> struct ttm_bo_lru_cursor curs;
> struct ttm_buffer_object *ttm_bo;
> + struct ttm_lru_walk_arg arg = {.ctx = ctx};
>
> if (!man || !man->use_tt)
> continue;
>
> - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) {
> + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) {
> if (!ttm_bo_shrink_suitable(ttm_bo, ctx))
> continue;
>
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 4e52283e5db1..8f04fa48b332 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -484,8 +484,8 @@ struct ttm_bo_lru_cursor {
> * unlock before the next iteration or after loop exit.
> */
> bool needs_unlock;
> - /** @arg: Common BO LRU walk arguments. */
> - struct ttm_lru_walk_arg arg;
> + /** @arg: Pointer to common BO LRU walk arguments. */
> + struct ttm_lru_walk_arg *arg;
> };
>
> void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
> @@ -493,7 +493,7 @@ void ttm_bo_lru_cursor_fini(struct ttm_bo_lru_cursor *curs);
> struct ttm_bo_lru_cursor *
> ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
> struct ttm_resource_manager *man,
> - struct ttm_operation_ctx *ctx);
> + struct ttm_lru_walk_arg *arg);
>
> struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs);
>
> @@ -504,9 +504,9 @@ struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
> */
> DEFINE_CLASS(ttm_bo_lru_cursor, struct ttm_bo_lru_cursor *,
> if (_T) {ttm_bo_lru_cursor_fini(_T); },
> - ttm_bo_lru_cursor_init(curs, man, ctx),
> + ttm_bo_lru_cursor_init(curs, man, arg),
> struct ttm_bo_lru_cursor *curs, struct ttm_resource_manager *man,
> - struct ttm_operation_ctx *ctx);
> + struct ttm_lru_walk_arg *arg);
> static inline void *
> class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> { return *_T; }
> @@ -517,7 +517,7 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> * resources on LRU lists.
> * @_cursor: struct ttm_bo_lru_cursor to use for the iteration.
> * @_man: The resource manager whose LRU lists to iterate over.
> - * @_ctx: The struct ttm_operation_context to govern the @_bo locking.
> + * @_arg: The struct ttm_lru_walk_arg to govern the LRU walk.
> * @_bo: The struct ttm_buffer_object pointer pointing to the buffer object
> * for the current iteration.
> *
> @@ -530,8 +530,8 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> * example a return or break statement. Exiting the loop will also unlock
> * (if needed) and unreference @_bo.
> */
> -#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _ctx, _bo) \
> - scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _ctx) \
> +#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \
> + scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \
> for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \
> (_bo) = ttm_bo_lru_cursor_next(_cursor))
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
2025-06-13 18:39 ` kernel test robot
@ 2025-06-16 13:23 ` Christian König
2025-06-16 15:29 ` Thomas Hellström
2025-06-18 17:43 ` Dan Carpenter
2 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2025-06-16 13:23 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: dri-devel, airlied, Matthew Brost, Matthew Auld
On 6/13/25 17:18, Thomas Hellström wrote:
> To avoid duplicating the tricky bo locking implementation,
> Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration.
>
> To facilitate this, support ticketlocking from the guarded bo LRU
> iteration.
That looks mostly identical to a patch I have in my drm_exec branch.
A few questions below.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo_util.c | 166 ++++++++++++------------------
> drivers/gpu/drm/xe/xe_shrinker.c | 7 +-
> include/drm/ttm/ttm_bo.h | 9 +-
> 3 files changed, 76 insertions(+), 106 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 62b76abac578..9bc17ea1adb2 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -821,12 +821,6 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
> return ret;
> }
>
> -static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
> -{
> - if (locked)
> - dma_resv_unlock(bo->base.resv);
> -}
> -
> /**
> * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
> * valid items.
> @@ -861,64 +855,21 @@ static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
> s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
> struct ttm_resource_manager *man, s64 target)
> {
> - struct ttm_resource_cursor cursor;
> - struct ttm_resource *res;
> + struct ttm_bo_lru_cursor cursor;
> + struct ttm_buffer_object *bo;
> s64 progress = 0;
> s64 lret;
>
> - spin_lock(&bdev->lru_lock);
> - ttm_resource_cursor_init(&cursor, man);
> - ttm_resource_manager_for_each_res(&cursor, res) {
> - struct ttm_buffer_object *bo = res->bo;
> - bool bo_needs_unlock = false;
> - bool bo_locked = false;
> - int mem_type;
> -
> - /*
> - * Attempt a trylock before taking a reference on the bo,
> - * since if we do it the other way around, and the trylock fails,
> - * we need to drop the lru lock to put the bo.
> - */
> - if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
> - bo_locked = true;
> - else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
> - walk->arg.trylock_only)
> - continue;
> -
> - if (!ttm_bo_get_unless_zero(bo)) {
> - ttm_lru_walk_unlock(bo, bo_needs_unlock);
> - continue;
> - }
> -
> - mem_type = res->mem_type;
> - spin_unlock(&bdev->lru_lock);
> -
> - lret = 0;
> - if (!bo_locked)
> - lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);
> -
> - /*
> - * Note that in between the release of the lru lock and the
> - * ticketlock, the bo may have switched resource,
> - * and also memory type, since the resource may have been
> - * freed and allocated again with a different memory type.
> - * In that case, just skip it.
> - */
> - if (!lret && bo->resource && bo->resource->mem_type == mem_type)
> - lret = walk->ops->process_bo(walk, bo);
> -
> - ttm_lru_walk_unlock(bo, bo_needs_unlock);
> - ttm_bo_put(bo);
> + ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) {
> + lret = walk->ops->process_bo(walk, bo);
> if (lret == -EBUSY || lret == -EALREADY)
> lret = 0;
> progress = (lret < 0) ? lret : progress + lret;
> -
> - spin_lock(&bdev->lru_lock);
> if (progress < 0 || progress >= target)
> break;
> }
> - ttm_resource_cursor_fini(&cursor);
> - spin_unlock(&bdev->lru_lock);
> + if (IS_ERR(bo))
> + return PTR_ERR(bo);
>
> return progress;
> }
> @@ -958,10 +909,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
> * @man: The ttm resource_manager whose LRU lists to iterate over.
> * @arg: The ttm_lru_walk_arg to govern the walk.
> *
> - * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
> - * or prelocked buffer objects are available as detailed by
> - * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
> - * supported.
> + * Initialize a struct ttm_bo_lru_cursor.
> *
> * Return: Pointer to @curs. The function does not fail.
> */
> @@ -979,21 +927,65 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
> EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
>
> static struct ttm_buffer_object *
> -ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *curs)
> +__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool first)
Giving first as bool parameter here looks really ugly. Isn't there any other way to do this?
> {
> - struct ttm_buffer_object *bo = res->bo;
> + spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> + struct ttm_resource *res = NULL;
> + struct ttm_buffer_object *bo;
> + struct ttm_lru_walk_arg *arg = curs->arg;
>
> - if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
> - return NULL;
> + ttm_bo_lru_cursor_cleanup_bo(curs);
>
> - if (!ttm_bo_get_unless_zero(bo)) {
> - if (curs->needs_unlock)
> - dma_resv_unlock(bo->base.resv);
> - return NULL;
> + spin_lock(lru_lock);
> + for (;;) {
> + int mem_type, ret;
> + bool bo_locked = false;
> +
> + if (first) {
> + res = ttm_resource_manager_first(&curs->res_curs);
> + first = false;
> + } else {
> + res = ttm_resource_manager_next(&curs->res_curs);
> + }
> + if (!res)
> + break;
> +
> + bo = res->bo;
> + if (ttm_lru_walk_trylock(arg, bo, &curs->needs_unlock))
Could/should we move needs_unlock into arg as well?
Apart from that looks good to me.
Regards,
Christian.
> + bo_locked = true;
> + else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
> + continue;
> +
> + if (!ttm_bo_get_unless_zero(bo)) {
> + if (curs->needs_unlock)
> + dma_resv_unlock(bo->base.resv);
> + continue;
> + }
> +
> + mem_type = res->mem_type;
> + spin_unlock(lru_lock);
> + if (!bo_locked)
> + ret = ttm_lru_walk_ticketlock(arg, bo, &curs->needs_unlock);
> + /*
> + * Note that in between the release of the lru lock and the
> + * ticketlock, the bo may have switched resource,
> + * and also memory type, since the resource may have been
> + * freed and allocated again with a different memory type.
> + * In that case, just skip it.
> + */
> + curs->bo = bo;
> + if (!ret && bo->resource && bo->resource->mem_type == mem_type)
> + return bo;
> +
> + ttm_bo_lru_cursor_cleanup_bo(curs);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + spin_lock(lru_lock);
> }
>
> - curs->bo = bo;
> - return bo;
> + spin_unlock(lru_lock);
> + return res ? bo : NULL;
> }
>
> /**
> @@ -1007,25 +999,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
> */
> struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
> {
> - spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> - struct ttm_resource *res = NULL;
> - struct ttm_buffer_object *bo;
> -
> - ttm_bo_lru_cursor_cleanup_bo(curs);
> -
> - spin_lock(lru_lock);
> - for (;;) {
> - res = ttm_resource_manager_next(&curs->res_curs);
> - if (!res)
> - break;
> -
> - bo = ttm_bo_from_res_reserved(res, curs);
> - if (bo)
> - break;
> - }
> -
> - spin_unlock(lru_lock);
> - return res ? bo : NULL;
> + return __ttm_bo_lru_cursor_next(curs, false);
> }
> EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
>
> @@ -1039,21 +1013,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
> */
> struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs)
> {
> - spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
> - struct ttm_buffer_object *bo;
> - struct ttm_resource *res;
> -
> - spin_lock(lru_lock);
> - res = ttm_resource_manager_first(&curs->res_curs);
> - if (!res) {
> - spin_unlock(lru_lock);
> - return NULL;
> - }
> -
> - bo = ttm_bo_from_res_reserved(res, curs);
> - spin_unlock(lru_lock);
> -
> - return bo ? bo : ttm_bo_lru_cursor_next(curs);
> + return __ttm_bo_lru_cursor_next(curs, true);
> }
> EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
>
> diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
> index f8a1129da2c3..1c3c04d52f55 100644
> --- a/drivers/gpu/drm/xe/xe_shrinker.c
> +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> @@ -66,7 +66,10 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
> struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
> struct ttm_bo_lru_cursor curs;
> struct ttm_buffer_object *ttm_bo;
> - struct ttm_lru_walk_arg arg = {.ctx = ctx};
> + struct ttm_lru_walk_arg arg = {
> + .ctx = ctx,
> + .trylock_only = true,
> + };
>
> if (!man || !man->use_tt)
> continue;
> @@ -83,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
> if (*scanned >= to_scan)
> break;
> }
> + /* Trylocks should never error, just fail. */
> + xe_assert(xe, !IS_ERR(ttm_bo));
> }
>
> return freed;
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 8f04fa48b332..d3a85d76aaff 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -529,10 +529,15 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> * up at looping termination, even if terminated prematurely by, for
> * example a return or break statement. Exiting the loop will also unlock
> * (if needed) and unreference @_bo.
> + *
> + * Return: If locking of a bo returns an error, then iteration is terminated
> + * and @_bo is set to a corresponding error pointer. It's illegal to
> + * dereference @_bo after loop exit.
> */
> #define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo) \
> scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg) \
> - for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo); \
> - (_bo) = ttm_bo_lru_cursor_next(_cursor))
> + for ((_bo) = ttm_bo_lru_cursor_first(_cursor); \
> + !IS_ERR_OR_NULL(_bo); \
> + (_bo) = ttm_bo_lru_cursor_next(_cursor))
>
> #endif
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
2025-06-16 13:23 ` Christian König
@ 2025-06-16 15:29 ` Thomas Hellström
0 siblings, 0 replies; 10+ messages in thread
From: Thomas Hellström @ 2025-06-16 15:29 UTC (permalink / raw)
To: Christian König, intel-xe
Cc: dri-devel, airlied, Matthew Brost, Matthew Auld
On Mon, 2025-06-16 at 15:23 +0200, Christian König wrote:
> On 6/13/25 17:18, Thomas Hellström wrote:
> > To avoid duplicating the tricky bo locking implementation,
> > Implement ttm_lru_walk_for_evict() using the guarded bo LRU
> > iteration.
> >
> > To facilitate this, support ticketlocking from the guarded bo LRU
> > iteration.
>
> That looks mostly identical to a patch I have in my drm_exec branch.
>
> A few questions below.
>
> >
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/ttm/ttm_bo_util.c | 166 ++++++++++++--------------
> > ----
> > drivers/gpu/drm/xe/xe_shrinker.c | 7 +-
> > include/drm/ttm/ttm_bo.h | 9 +-
> > 3 files changed, 76 insertions(+), 106 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index 62b76abac578..9bc17ea1adb2 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -821,12 +821,6 @@ static int ttm_lru_walk_ticketlock(struct
> > ttm_lru_walk_arg *arg,
> > return ret;
> > }
> >
> > -static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool
> > locked)
> > -{
> > - if (locked)
> > - dma_resv_unlock(bo->base.resv);
> > -}
> > -
> > /**
> > * ttm_lru_walk_for_evict() - Perform a LRU list walk, with
> > actions taken on
> > * valid items.
> > @@ -861,64 +855,21 @@ static void ttm_lru_walk_unlock(struct
> > ttm_buffer_object *bo, bool locked)
> > s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct
> > ttm_device *bdev,
> > struct ttm_resource_manager *man, s64
> > target)
> > {
> > - struct ttm_resource_cursor cursor;
> > - struct ttm_resource *res;
> > + struct ttm_bo_lru_cursor cursor;
> > + struct ttm_buffer_object *bo;
> > s64 progress = 0;
> > s64 lret;
> >
> > - spin_lock(&bdev->lru_lock);
> > - ttm_resource_cursor_init(&cursor, man);
> > - ttm_resource_manager_for_each_res(&cursor, res) {
> > - struct ttm_buffer_object *bo = res->bo;
> > - bool bo_needs_unlock = false;
> > - bool bo_locked = false;
> > - int mem_type;
> > -
> > - /*
> > - * Attempt a trylock before taking a reference on
> > the bo,
> > - * since if we do it the other way around, and the
> > trylock fails,
> > - * we need to drop the lru lock to put the bo.
> > - */
> > - if (ttm_lru_walk_trylock(&walk->arg, bo,
> > &bo_needs_unlock))
> > - bo_locked = true;
> > - else if (!walk->arg.ticket || walk->arg.ctx-
> > >no_wait_gpu ||
> > - walk->arg.trylock_only)
> > - continue;
> > -
> > - if (!ttm_bo_get_unless_zero(bo)) {
> > - ttm_lru_walk_unlock(bo, bo_needs_unlock);
> > - continue;
> > - }
> > -
> > - mem_type = res->mem_type;
> > - spin_unlock(&bdev->lru_lock);
> > -
> > - lret = 0;
> > - if (!bo_locked)
> > - lret = ttm_lru_walk_ticketlock(&walk->arg,
> > bo, &bo_needs_unlock);
> > -
> > - /*
> > - * Note that in between the release of the lru
> > lock and the
> > - * ticketlock, the bo may have switched resource,
> > - * and also memory type, since the resource may
> > have been
> > - * freed and allocated again with a different
> > memory type.
> > - * In that case, just skip it.
> > - */
> > - if (!lret && bo->resource && bo->resource-
> > >mem_type == mem_type)
> > - lret = walk->ops->process_bo(walk, bo);
> > -
> > - ttm_lru_walk_unlock(bo, bo_needs_unlock);
> > - ttm_bo_put(bo);
> > + ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk-
> > >arg, bo) {
> > + lret = walk->ops->process_bo(walk, bo);
> > if (lret == -EBUSY || lret == -EALREADY)
> > lret = 0;
> > progress = (lret < 0) ? lret : progress + lret;
> > -
> > - spin_lock(&bdev->lru_lock);
> > if (progress < 0 || progress >= target)
> > break;
> > }
> > - ttm_resource_cursor_fini(&cursor);
> > - spin_unlock(&bdev->lru_lock);
> > + if (IS_ERR(bo))
> > + return PTR_ERR(bo);
> >
> > return progress;
> > }
> > @@ -958,10 +909,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
> > * @man: The ttm resource_manager whose LRU lists to iterate over.
> > * @arg: The ttm_lru_walk_arg to govern the walk.
> > *
> > - * Initialize a struct ttm_bo_lru_cursor. Currently only
> > trylocking
> > - * or prelocked buffer objects are available as detailed by
> > - * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is
> > not
> > - * supported.
> > + * Initialize a struct ttm_bo_lru_cursor.
> > *
> > * Return: Pointer to @curs. The function does not fail.
> > */
> > @@ -979,21 +927,65 @@ ttm_bo_lru_cursor_init(struct
> > ttm_bo_lru_cursor *curs,
> > EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
> >
> > static struct ttm_buffer_object *
> > -ttm_bo_from_res_reserved(struct ttm_resource *res, struct
> > ttm_bo_lru_cursor *curs)
> > +__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool
> > first)
>
> Giving first as bool parameter here looks really ugly. Isn't there
> any other way to do this?
I agree. The previous way of doing this with a bo_from_res() isn't a
good fit since if ticketlocking, we'd like to return without the
spinlock held, and be called with the spinlock held. That's really
confusing both to readers and static analyzers so rather avoided.
We could look at curs->bo and see if it's NULL, but then I think the
bool argument is more readable. But if you have a better idea, I'm of
course open to changing this.
>
> > {
> > - struct ttm_buffer_object *bo = res->bo;
> > + spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > + struct ttm_resource *res = NULL;
> > + struct ttm_buffer_object *bo;
> > + struct ttm_lru_walk_arg *arg = curs->arg;
> >
> > - if (!ttm_lru_walk_trylock(curs->arg, bo, &curs-
> > >needs_unlock))
> > - return NULL;
> > + ttm_bo_lru_cursor_cleanup_bo(curs);
> >
> > - if (!ttm_bo_get_unless_zero(bo)) {
> > - if (curs->needs_unlock)
> > - dma_resv_unlock(bo->base.resv);
> > - return NULL;
> > + spin_lock(lru_lock);
> > + for (;;) {
> > + int mem_type, ret;
> > + bool bo_locked = false;
> > +
> > + if (first) {
> > + res = ttm_resource_manager_first(&curs-
> > >res_curs);
> > + first = false;
> > + } else {
> > + res = ttm_resource_manager_next(&curs-
> > >res_curs);
> > + }
> > + if (!res)
> > + break;
> > +
> > + bo = res->bo;
> > + if (ttm_lru_walk_trylock(arg, bo, &curs-
> > >needs_unlock))
>
> Could/should we move needs_unlock into arg as well?
They are different in that arg is state that governs the whole loop but
needs_unlock is local to a single iteration. We could of course pass
the cursor to the various locking functions to eliminate one argument
now that walk_for_evict() also uses the cursor.
Thoughts?
/Thomas
>
> Apart from that looks good to me.
>
> Regards,
> Christian.
>
> > + bo_locked = true;
> > + else if (!arg->ticket || arg->ctx->no_wait_gpu ||
> > arg->trylock_only)
> > + continue;
> > +
> > + if (!ttm_bo_get_unless_zero(bo)) {
> > + if (curs->needs_unlock)
> > + dma_resv_unlock(bo->base.resv);
> > + continue;
> > + }
> > +
> > + mem_type = res->mem_type;
> > + spin_unlock(lru_lock);
> > + if (!bo_locked)
> > + ret = ttm_lru_walk_ticketlock(arg, bo,
> > &curs->needs_unlock);
> > + /*
> > + * Note that in between the release of the lru
> > lock and the
> > + * ticketlock, the bo may have switched resource,
> > + * and also memory type, since the resource may
> > have been
> > + * freed and allocated again with a different
> > memory type.
> > + * In that case, just skip it.
> > + */
> > + curs->bo = bo;
> > + if (!ret && bo->resource && bo->resource->mem_type
> > == mem_type)
> > + return bo;
> > +
> > + ttm_bo_lru_cursor_cleanup_bo(curs);
> > + if (ret)
> > + return ERR_PTR(ret);
> > +
> > + spin_lock(lru_lock);
> > }
> >
> > - curs->bo = bo;
> > - return bo;
> > + spin_unlock(lru_lock);
> > + return res ? bo : NULL;
> > }
> >
> > /**
> > @@ -1007,25 +999,7 @@ ttm_bo_from_res_reserved(struct ttm_resource
> > *res, struct ttm_bo_lru_cursor *cur
> > */
> > struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct
> > ttm_bo_lru_cursor *curs)
> > {
> > - spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > - struct ttm_resource *res = NULL;
> > - struct ttm_buffer_object *bo;
> > -
> > - ttm_bo_lru_cursor_cleanup_bo(curs);
> > -
> > - spin_lock(lru_lock);
> > - for (;;) {
> > - res = ttm_resource_manager_next(&curs->res_curs);
> > - if (!res)
> > - break;
> > -
> > - bo = ttm_bo_from_res_reserved(res, curs);
> > - if (bo)
> > - break;
> > - }
> > -
> > - spin_unlock(lru_lock);
> > - return res ? bo : NULL;
> > + return __ttm_bo_lru_cursor_next(curs, false);
> > }
> > EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
> >
> > @@ -1039,21 +1013,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
> > */
> > struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct
> > ttm_bo_lru_cursor *curs)
> > {
> > - spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > - struct ttm_buffer_object *bo;
> > - struct ttm_resource *res;
> > -
> > - spin_lock(lru_lock);
> > - res = ttm_resource_manager_first(&curs->res_curs);
> > - if (!res) {
> > - spin_unlock(lru_lock);
> > - return NULL;
> > - }
> > -
> > - bo = ttm_bo_from_res_reserved(res, curs);
> > - spin_unlock(lru_lock);
> > -
> > - return bo ? bo : ttm_bo_lru_cursor_next(curs);
> > + return __ttm_bo_lru_cursor_next(curs, true);
> > }
> > EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
> >
> > diff --git a/drivers/gpu/drm/xe/xe_shrinker.c
> > b/drivers/gpu/drm/xe/xe_shrinker.c
> > index f8a1129da2c3..1c3c04d52f55 100644
> > --- a/drivers/gpu/drm/xe/xe_shrinker.c
> > +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> > @@ -66,7 +66,10 @@ static s64 xe_shrinker_walk(struct xe_device
> > *xe,
> > struct ttm_resource_manager *man =
> > ttm_manager_type(&xe->ttm, mem_type);
> > struct ttm_bo_lru_cursor curs;
> > struct ttm_buffer_object *ttm_bo;
> > - struct ttm_lru_walk_arg arg = {.ctx = ctx};
> > + struct ttm_lru_walk_arg arg = {
> > + .ctx = ctx,
> > + .trylock_only = true,
> > + };
> >
> > if (!man || !man->use_tt)
> > continue;
> > @@ -83,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
> > if (*scanned >= to_scan)
> > break;
> > }
> > + /* Trylocks should never error, just fail. */
> > + xe_assert(xe, !IS_ERR(ttm_bo));
> > }
> >
> > return freed;
> > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> > index 8f04fa48b332..d3a85d76aaff 100644
> > --- a/include/drm/ttm/ttm_bo.h
> > +++ b/include/drm/ttm/ttm_bo.h
> > @@ -529,10 +529,15 @@
> > class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> > * up at looping termination, even if terminated prematurely by,
> > for
> > * example a return or break statement. Exiting the loop will also
> > unlock
> > * (if needed) and unreference @_bo.
> > + *
> > + * Return: If locking of a bo returns an error, then iteration is
> > terminated
> > + * and @_bo is set to a corresponding error pointer. It's illegal
> > to
> > + * dereference @_bo after loop exit.
> > */
> > #define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg,
> > _bo) \
> > scoped_guard(ttm_bo_lru_cursor, _cursor, _man,
> > _arg) \
> > - for ((_bo) = ttm_bo_lru_cursor_first(_cursor);
> > (_bo); \
> > - (_bo) = ttm_bo_lru_cursor_next(_cursor))
> > + for ((_bo) =
> > ttm_bo_lru_cursor_first(_cursor); \
> > +
> > !IS_ERR_OR_NULL(_bo); \
> > + (_bo) = ttm_bo_lru_cursor_next(_cursor))
> >
> > #endif
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
2025-06-13 18:39 ` kernel test robot
2025-06-16 13:23 ` Christian König
@ 2025-06-18 17:43 ` Dan Carpenter
2 siblings, 0 replies; 10+ messages in thread
From: Dan Carpenter @ 2025-06-18 17:43 UTC (permalink / raw)
To: oe-kbuild, Thomas Hellström, intel-xe
Cc: lkp, oe-kbuild-all, Thomas Hellström, dri-devel, airlied,
Matthew Brost, Matthew Auld, Christian König
Hi Thomas,
kernel test robot noticed the following build warnings:
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Use-a-struct-for-the-common-part-of-struct-ttm_lru_walk-and-struct-ttm_bo_lru_cursor/20250613-232106
base: https://gitlab.freedesktop.org/drm/xe/kernel.git drm-xe-next
patch link: https://lore.kernel.org/r/20250613151824.178650-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration
config: i386-randconfig-141-20250614 (https://download.01.org/0day-ci/archive/20250614/202506141727.FtEuY8xN-lkp@intel.com/config)
compiler: clang version 20.1.2 (https://github.com/llvm/llvm-project 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202506141727.FtEuY8xN-lkp@intel.com/
smatch warnings:
drivers/gpu/drm/ttm/ttm_bo_util.c:975 __ttm_bo_lru_cursor_next() error: uninitialized symbol 'ret'.
vim +/ret +975 drivers/gpu/drm/ttm/ttm_bo_util.c
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 927 static struct ttm_buffer_object *
a9654c8f32d9f4 Thomas Hellström 2025-06-13 928 __ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs, bool first)
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 929 {
a9654c8f32d9f4 Thomas Hellström 2025-06-13 930 spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 931 struct ttm_resource *res = NULL;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 932 struct ttm_buffer_object *bo;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 933 struct ttm_lru_walk_arg *arg = curs->arg;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 934
a9654c8f32d9f4 Thomas Hellström 2025-06-13 935 ttm_bo_lru_cursor_cleanup_bo(curs);
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 936
a9654c8f32d9f4 Thomas Hellström 2025-06-13 937 spin_lock(lru_lock);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 938 for (;;) {
a9654c8f32d9f4 Thomas Hellström 2025-06-13 939 int mem_type, ret;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 940 bool bo_locked = false;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 941
a9654c8f32d9f4 Thomas Hellström 2025-06-13 942 if (first) {
a9654c8f32d9f4 Thomas Hellström 2025-06-13 943 res = ttm_resource_manager_first(&curs->res_curs);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 944 first = false;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 945 } else {
a9654c8f32d9f4 Thomas Hellström 2025-06-13 946 res = ttm_resource_manager_next(&curs->res_curs);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 947 }
a9654c8f32d9f4 Thomas Hellström 2025-06-13 948 if (!res)
a9654c8f32d9f4 Thomas Hellström 2025-06-13 949 break;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 950
a9654c8f32d9f4 Thomas Hellström 2025-06-13 951 bo = res->bo;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 952 if (ttm_lru_walk_trylock(arg, bo, &curs->needs_unlock))
a9654c8f32d9f4 Thomas Hellström 2025-06-13 953 bo_locked = true;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 954 else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
a9654c8f32d9f4 Thomas Hellström 2025-06-13 955 continue;
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 956
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 957 if (!ttm_bo_get_unless_zero(bo)) {
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 958 if (curs->needs_unlock)
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 959 dma_resv_unlock(bo->base.resv);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 960 continue;
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 961 }
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 962
a9654c8f32d9f4 Thomas Hellström 2025-06-13 963 mem_type = res->mem_type;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 964 spin_unlock(lru_lock);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 965 if (!bo_locked)
a9654c8f32d9f4 Thomas Hellström 2025-06-13 966 ret = ttm_lru_walk_ticketlock(arg, bo, &curs->needs_unlock);
ret is uninitialized on else path
a9654c8f32d9f4 Thomas Hellström 2025-06-13 967 /*
a9654c8f32d9f4 Thomas Hellström 2025-06-13 968 * Note that in between the release of the lru lock and the
a9654c8f32d9f4 Thomas Hellström 2025-06-13 969 * ticketlock, the bo may have switched resource,
a9654c8f32d9f4 Thomas Hellström 2025-06-13 970 * and also memory type, since the resource may have been
a9654c8f32d9f4 Thomas Hellström 2025-06-13 971 * freed and allocated again with a different memory type.
a9654c8f32d9f4 Thomas Hellström 2025-06-13 972 * In that case, just skip it.
a9654c8f32d9f4 Thomas Hellström 2025-06-13 973 */
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 974 curs->bo = bo;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 @975 if (!ret && bo->resource && bo->resource->mem_type == mem_type)
^^^^
warning
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 976 return bo;
a9654c8f32d9f4 Thomas Hellström 2025-06-13 977
a9654c8f32d9f4 Thomas Hellström 2025-06-13 978 ttm_bo_lru_cursor_cleanup_bo(curs);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 979 if (ret)
a9654c8f32d9f4 Thomas Hellström 2025-06-13 980 return ERR_PTR(ret);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 981
a9654c8f32d9f4 Thomas Hellström 2025-06-13 982 spin_lock(lru_lock);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 983 }
a9654c8f32d9f4 Thomas Hellström 2025-06-13 984
a9654c8f32d9f4 Thomas Hellström 2025-06-13 985 spin_unlock(lru_lock);
a9654c8f32d9f4 Thomas Hellström 2025-06-13 986 return res ? bo : NULL;
f3bcfd04a52fb1 Thomas Hellström 2025-03-05 987 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-06-18 17:43 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-13 15:18 [PATCH 0/3] drm/ttm, drm/xe: Consolidate the Buffer Object LRU walks Thomas Hellström
2025-06-13 15:18 ` [PATCH 1/3] drm/ttm: Use a struct for the common part of struct ttm_lru_walk and struct ttm_bo_lru_cursor Thomas Hellström
2025-06-16 13:04 ` Christian König
2025-06-13 15:18 ` [PATCH 2/3] drm/ttm, drm/xe: Modify the struct ttm_bo_lru_walk_cursor initialization Thomas Hellström
2025-06-16 13:15 ` Christian König
2025-06-13 15:18 ` [PATCH 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration Thomas Hellström
2025-06-13 18:39 ` kernel test robot
2025-06-16 13:23 ` Christian König
2025-06-16 15:29 ` Thomas Hellström
2025-06-18 17:43 ` Dan Carpenter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).