All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [CI v4 07/21] drm/ttm: sleeping evict lock.
Date: Fri, 17 May 2024 09:41:16 +0200	[thread overview]
Message-ID: <20240517074130.2908-8-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20240517074130.2908-1-thomas.hellstrom@linux.intel.com>

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c       | 340 +++++++++++------------------
 drivers/gpu/drm/ttm/ttm_device.c   |   1 -
 drivers/gpu/drm/ttm/ttm_resource.c |  20 +-
 include/drm/ttm/ttm_bo.h           |   8 +-
 4 files changed, 131 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4dbd53852773..56bae5c68d85 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 	dma_resv_iter_end(&cursor);
 }
 
-/**
- * ttm_bo_cleanup_refs
- * If bo idle, remove from lru lists, and unref.
- * If not idle, block if possible.
- *
- * Must be called with lru_lock and reservation held, this function
- * will drop the lru lock and optionally the reservation lock before returning.
- *
- * @bo:                    The buffer object to clean-up
- * @interruptible:         Any sleeps should occur interruptibly.
- * @no_wait_gpu:           Never wait for gpu. Return -EBUSY instead.
- * @unlock_resv:           Unlock the reservation lock as well.
- */
-
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-			       bool interruptible, bool no_wait_gpu,
-			       bool unlock_resv)
-{
-	struct dma_resv *resv = &bo->base._resv;
-	int ret;
-
-	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
-		ret = 0;
-	else
-		ret = -EBUSY;
-
-	if (ret && !no_wait_gpu) {
-		long lret;
-
-		if (unlock_resv)
-			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&bo->bdev->lru_lock);
-
-		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
-					     interruptible,
-					     30 * HZ);
-
-		if (lret < 0)
-			return lret;
-		else if (lret == 0)
-			return -EBUSY;
-
-		spin_lock(&bo->bdev->lru_lock);
-		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
-			/*
-			 * We raced, and lost, someone else holds the reservation now,
-			 * and is probably busy in ttm_bo_cleanup_memtype_use.
-			 *
-			 * Even if it's not the case, because we finished waiting any
-			 * delayed destruction would succeed, so just return success
-			 * here.
-			 */
-			spin_unlock(&bo->bdev->lru_lock);
-			return 0;
-		}
-		ret = 0;
-	}
-
-	if (ret) {
-		if (unlock_resv)
-			dma_resv_unlock(bo->base.resv);
-		spin_unlock(&bo->bdev->lru_lock);
-		return ret;
-	}
-
-	spin_unlock(&bo->bdev->lru_lock);
-	ttm_bo_cleanup_memtype_use(bo);
-
-	if (unlock_resv)
-		dma_resv_unlock(bo->base.resv);
-
-	return 0;
-}
-
 /*
  * Block for the dma_resv object to become idle, lock the buffer and clean up
  * the resource and tt object.
@@ -505,151 +431,133 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
-/*
- * Check the target bo is allowable to be evicted or swapout, including cases:
- *
- * a. if share same reservation object with ctx->resv, have assumption
- * reservation objects should already be locked, so not lock again and
- * return true directly when either the opreation allow_reserved_eviction
- * or the target bo already is in delayed free list;
- *
- * b. Otherwise, trylock it.
- */
-static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-					   struct ttm_operation_ctx *ctx,
-					   const struct ttm_place *place,
-					   bool *locked, bool *busy)
+int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man,
+		       struct ttm_operation_ctx *ctx)
 {
-	bool ret = false;
+	struct ttm_resource_cursor cursor;
+	struct ttm_resource *res;
+	struct ttm_buffer_object *bo;
+	int ret = 0;
 
-	if (bo->pin_count) {
-		*locked = false;
-		if (busy)
-			*busy = false;
-		return false;
+	spin_lock(&bdev->lru_lock);
+	res = ttm_resource_manager_first(man, &cursor);
+	if (!res) {
+		ret = -ENOENT;
+		goto out_no_ref;
 	}
+	bo = res->bo;
+	if (!ttm_bo_get_unless_zero(bo))
+		goto out_no_ref;
+	spin_unlock(&bdev->lru_lock);
+	dma_resv_lock(bo->base.resv, NULL);
+	if (bo->resource != res)
+		goto out_bad_res;
 
-	if (bo->base.resv == ctx->resv) {
-		dma_resv_assert_held(bo->base.resv);
-		if (ctx->allow_res_evict)
-			ret = true;
-		*locked = false;
-		if (busy)
-			*busy = false;
+	if (bo->deleted) {
+		ret = ttm_bo_wait_ctx(bo, ctx);
+		if (ret)
+			ttm_bo_cleanup_memtype_use(bo);
 	} else {
-		ret = dma_resv_trylock(bo->base.resv);
-		*locked = ret;
-		if (busy)
-			*busy = !ret;
-	}
-
-	if (ret && place && (bo->resource->mem_type != place->mem_type ||
-		!bo->bdev->funcs->eviction_valuable(bo, place))) {
-		ret = false;
-		if (*locked) {
-			dma_resv_unlock(bo->base.resv);
-			*locked = false;
-		}
+		ret = ttm_bo_evict(bo, ctx);
 	}
-
+out_bad_res:
+	dma_resv_unlock(bo->base.resv);
+	ttm_bo_put(bo);
+	ttm_resource_cursor_fini(&cursor);
 	return ret;
-}
-
-/**
- * ttm_mem_evict_wait_busy - wait for a busy BO to become available
- *
- * @busy_bo: BO which couldn't be locked with trylock
- * @ctx: operation context
- * @ticket: acquire ticket
- *
- * Try to lock a busy buffer object to avoid failing eviction.
- */
-static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo,
-				   struct ttm_operation_ctx *ctx,
-				   struct ww_acquire_ctx *ticket)
-{
-	int r;
-
-	if (!busy_bo || !ticket)
-		return -EBUSY;
 
-	if (ctx->interruptible)
-		r = dma_resv_lock_interruptible(busy_bo->base.resv,
-							  ticket);
-	else
-		r = dma_resv_lock(busy_bo->base.resv, ticket);
-
-	/*
-	 * TODO: It would be better to keep the BO locked until allocation is at
-	 * least tried one more time, but that would mean a much larger rework
-	 * of TTM.
-	 */
-	if (!r)
-		dma_resv_unlock(busy_bo->base.resv);
-
-	return r == -EDEADLK ? -EBUSY : r;
+out_no_ref:
+	ttm_resource_cursor_fini_locked(&cursor);
+	spin_unlock(&bdev->lru_lock);
+	return -ENOENT;
 }
 
-int ttm_mem_evict_first(struct ttm_device *bdev,
-			struct ttm_resource_manager *man,
-			const struct ttm_place *place,
-			struct ttm_operation_ctx *ctx,
-			struct ww_acquire_ctx *ticket)
+struct ttm_bo_evict_walk {
+	struct ttm_lru_walk walk;
+	const struct ttm_place *place;
+	struct ttm_buffer_object *evictor;
+	struct ttm_resource **res;
+	unsigned long evicted;
+};
+
+static long ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
-	struct ttm_resource_cursor cursor;
-	struct ttm_resource *res;
-	bool locked = false;
-	int ret;
+	struct ttm_bo_evict_walk *evict_walk =
+		container_of(walk, typeof(*evict_walk), walk);
+	long lret;
 
-	spin_lock(&bdev->lru_lock);
-	ttm_resource_manager_for_each_res(man, &cursor, res) {
-		bool busy;
-
-		if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place,
-						    &locked, &busy)) {
-			if (busy && !busy_bo && ticket !=
-			    dma_resv_locking_ctx(res->bo->base.resv))
-				busy_bo = res->bo;
-			continue;
-		}
+	if (!bo->bdev->funcs->eviction_valuable(bo, evict_walk->place))
+		return 0;
 
-		if (ttm_bo_get_unless_zero(res->bo)) {
-			bo = res->bo;
-			break;
-		}
-		if (locked)
-			dma_resv_unlock(res->bo->base.resv);
+	if (bo->deleted) {
+		lret = ttm_bo_wait_ctx(bo, walk->ctx);
+		if (!lret)
+			ttm_bo_cleanup_memtype_use(bo);
+	} else {
+		lret = ttm_bo_evict(bo, walk->ctx);
 	}
-	ttm_resource_cursor_fini_locked(&cursor);
 
-	if (!bo) {
-		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
-			busy_bo = NULL;
-		spin_unlock(&bdev->lru_lock);
-		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
-		if (busy_bo)
-			ttm_bo_put(busy_bo);
-		return ret;
-	}
+	if (lret)
+		goto out;
 
-	if (bo->deleted) {
-		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
-					  ctx->no_wait_gpu, locked);
-		ttm_bo_put(bo);
-		return ret;
-	}
+	evict_walk->evicted++;
+	if (evict_walk->res)
+		lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place,
+					  evict_walk->res);
+	if (lret == 0)
+		return 1;
+out:
+	/* Errors that should terminate the walk. */
+	if (lret == -ENOMEM || lret == -EINTR || lret == -ERESTARTSYS ||
+	    lret == -EAGAIN)
+		return lret;
 
-	spin_unlock(&bdev->lru_lock);
+	return 0;
+}
 
-	ret = ttm_bo_evict(bo, ctx);
-	if (locked)
-		ttm_bo_unreserve(bo);
-	else
-		ttm_bo_move_to_lru_tail_unlocked(bo);
+static const struct ttm_lru_walk_ops ttm_evict_walk_ops = {
+	.process_bo = ttm_bo_evict_cb,
+};
 
-	ttm_bo_put(bo);
-	return ret;
+static int ttm_bo_evict_alloc(struct ttm_device *bdev,
+			      struct ttm_resource_manager *man,
+			      const struct ttm_place *place,
+			      struct ttm_buffer_object *evictor,
+			      struct ttm_operation_ctx *ctx,
+			      struct ww_acquire_ctx *ticket,
+			      struct ttm_resource **res)
+{
+	struct ttm_bo_evict_walk evict_walk = {
+		.walk = {
+			.ops = &ttm_evict_walk_ops,
+			.ctx = ctx,
+			.ticket = ticket,
+		},
+		.place = place,
+		.evictor = evictor,
+		.res = res,
+	};
+	long lret;
+
+	evict_walk.walk.trylock_only = true;
+	lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
+	if (lret || !ticket)
+		goto out;
+
+	/* If ticket-locking, repeat while making progress. */
+	evict_walk.walk.trylock_only = false;
+	do {
+		/* The walk may clear the evict_walk.walk.ticket field */
+		evict_walk.walk.ticket = ticket;
+		evict_walk.evicted = 0;
+		lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
+	} while (!lret && evict_walk.evicted);
+out:
+	if (lret < 0)
+		return lret;
+	if (lret == 0)
+		return -EBUSY;
+	return 0;
 }
 
 /**
@@ -760,6 +668,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
 		struct ttm_resource_manager *man;
+		bool may_evict;
 
 		man = ttm_manager_type(bdev, place->mem_type);
 		if (!man || !ttm_resource_manager_used(man))
@@ -769,22 +678,21 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 				    TTM_PL_FLAG_FALLBACK))
 			continue;
 
-		do {
-			ret = ttm_resource_alloc(bo, place, res);
-			if (unlikely(ret && ret != -ENOSPC))
+		may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
+		ret = ttm_resource_alloc(bo, place, res);
+		if (ret) {
+			if (ret != -ENOSPC)
 				return ret;
-			if (likely(!ret) || !force_space)
-				break;
-
-			ret = ttm_mem_evict_first(bdev, man, place, ctx,
-						  ticket);
-			if (unlikely(ret == -EBUSY))
-				break;
-			if (unlikely(ret))
+			if (!may_evict)
+				continue;
+
+			ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx,
+						 ticket, res);
+			if (ret == -EBUSY)
+				continue;
+			if (ret)
 				return ret;
-		} while (1);
-		if (ret)
-			continue;
+		}
 
 		ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
 		if (unlikely(ret)) {
@@ -796,7 +704,6 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 		}
 		return 0;
 	}
-
 	return -ENOSPC;
 }
 
@@ -822,6 +729,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	do {
 		ret = ttm_bo_alloc_resource(bo, placement, ctx,
 					    force_space, res);
+		if (ret)
+			pr_err("mem space failure %pe, force_space %d\n",
+			       ERR_PTR(ret), force_space);
 		force_space = !force_space;
 	} while (ret == -ENOSPC && force_space);
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index c9d08ecc0f3b..ee575d8a54c0 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -152,7 +152,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 	unsigned i;
 	long lret;
 
-	spin_lock(&bdev->lru_lock);
 	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
 		man = ttm_manager_type(bdev, i);
 		if (!man || !man->use_tt)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 9b80bdcf2216..a68a96a8241b 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -509,24 +509,10 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 	};
 	struct dma_fence *fence;
 	int ret;
-	unsigned i;
-
-	/*
-	 * Can't use standard list traversal since we're unlocking.
-	 */
 
-	spin_lock(&bdev->lru_lock);
-	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-		while (!list_empty(&man->lru[i])) {
-			spin_unlock(&bdev->lru_lock);
-			ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
-						  NULL);
-			if (ret)
-				return ret;
-			spin_lock(&bdev->lru_lock);
-		}
-	}
-	spin_unlock(&bdev->lru_lock);
+	do {
+		ret = ttm_bo_evict_first(bdev, man, &ctx);
+	} while (!ret);
 
 	spin_lock(&man->move_lock);
 	fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8c43939e95e9..30c3a5fd9099 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -415,11 +415,9 @@ long ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 		    pgoff_t target);
 void ttm_bo_pin(struct ttm_buffer_object *bo);
 void ttm_bo_unpin(struct ttm_buffer_object *bo);
-int ttm_mem_evict_first(struct ttm_device *bdev,
-			struct ttm_resource_manager *man,
-			const struct ttm_place *place,
-			struct ttm_operation_ctx *ctx,
-			struct ww_acquire_ctx *ticket);
+int ttm_bo_evict_first(struct ttm_device *bdev,
+		       struct ttm_resource_manager *man,
+		       struct ttm_operation_ctx *ctx);
 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 			     struct vm_fault *vmf);
 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
-- 
2.44.0


  parent reply	other threads:[~2024-05-17  7:41 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-17  7:41 [CI v4 00/21] xe bo shrinker and exhaustive eviction Thomas Hellström
2024-05-17  7:41 ` [CI v4 01/21] drm/ttm: Allow TTM LRU list nodes of different types Thomas Hellström
2024-05-17  7:41 ` [CI v4 02/21] drm/ttm: Slightly clean up LRU list iteration Thomas Hellström
2024-05-17  7:41 ` [CI v4 03/21] drm/ttm: Use LRU hitches Thomas Hellström
2024-05-17  7:41 ` [CI v4 04/21] drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves Thomas Hellström
2024-05-17  7:41 ` [CI v4 05/21] drm/ttm: Provide a generic LRU walker helper Thomas Hellström
2024-05-17  7:41 ` [CI v4 06/21] drm/ttm: Use restartable LRU and sleeping locks for swapping Thomas Hellström
2024-05-17  7:41 ` Thomas Hellström [this message]
2024-05-17  7:41 ` [CI v4 08/21] drm/ttm: Add a virtual base class for graphics memory backup Thomas Hellström
2024-05-17  7:41 ` [CI v4 09/21] drm/ttm/pool: Provide a helper to shrink pages Thomas Hellström
2024-05-17  7:41 ` [CI v4 10/21] drm/ttm: Use fault-injection to test error paths Thomas Hellström
2024-05-17  7:41 ` [CI v4 11/21] drm/ttm, drm/xe: Add a shrinker for xe bos Thomas Hellström
2024-05-17  7:41 ` [CI v4 12/21] dma-buf/dma-resv: Introduce dma_resv_trylock_ctx() Thomas Hellström
2024-05-17  7:41 ` [CI v4 13/21] drm/exec: Rework contended locking Thomas Hellström
2024-05-17  7:41 ` [CI v4 14/21] drm/exec: drm_exec_trylock() Thomas Hellström
2024-05-17  7:41 ` [CI v4 15/21] drm/exec: Add a snapshot capability Thomas Hellström
2024-05-17  7:41 ` [CI v4 16/21] drm/exec: Introduce an evict mode Thomas Hellström
2024-05-17  7:41 ` [CI v4 17/21] drm/ttm: Support drm_exec locking for eviction and swapping Thomas Hellström
2024-05-17  7:41 ` [CI v4 18/21] drm/ttm: Convert ttm vm to using drm_exec Thomas Hellström
2024-05-17  7:41 ` [CI v4 19/21] drm/xe: Use drm_exec for fault locking Thomas Hellström
2024-05-17  7:41 ` [CI v4 20/21] drm/ttm: Use drm_exec_trylock for bo initialization Thomas Hellström
2024-05-17  7:41 ` [CI v4 21/21] drm/xe: Initial support for drm exec locking during validate Thomas Hellström
2024-05-17  7:47 ` ✓ CI.Patch_applied: success for xe bo shrinker and exhaustive eviction (rev5) Patchwork
2024-05-17  7:48 ` ✗ CI.checkpatch: warning " Patchwork
2024-05-17  7:49 ` ✓ CI.KUnit: success " Patchwork
2024-05-17  8:01 ` ✓ CI.Build: " Patchwork
2024-05-17  8:03 ` ✗ CI.Hooks: failure " Patchwork
2024-05-17  8:05 ` ✗ CI.checksparse: warning " Patchwork
2024-05-17  8:27 ` ✓ CI.BAT: success " Patchwork
2024-05-17  9:52 ` ✓ CI.FULL: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240517074130.2908-8-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.