Linux cgroups development
 help / color / mirror / Atom feed
From: Natalie Vock <natalie.vock@gmx.de>
To: "Maarten Lankhorst" <dev@lankhorst.se>,
	"Maxime Ripard" <mripard@kernel.org>, "Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Christian Koenig" <christian.koenig@amd.com>,
	"Huang Rui" <ray.huang@amd.com>,
	"Matthew Auld" <matthew.auld@intel.com>,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
	"Thomas Zimmermann" <tzimmermann@suse.de>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>
Cc: cgroups@vger.kernel.org, dri-devel@lists.freedesktop.org
Subject: [PATCH 3/4] drm/ttm: Be more aggressive when allocating below protection limit
Date: Mon, 15 Sep 2025 14:36:30 +0200	[thread overview]
Message-ID: <20250915-dmemcg-aggressive-protect-v1-3-2f3353bfcdac@gmx.de> (raw)
In-Reply-To: <20250915-dmemcg-aggressive-protect-v1-0-2f3353bfcdac@gmx.de>

When the cgroup's memory usage is below the low/min limit and allocation
fails, try evicting some unprotected buffers to make space. Otherwise,
application buffers may be forced to go into GTT even though usage is
below the corresponding low/min limit, if other applications filled VRAM
with their allocations first.

Signed-off-by: Natalie Vock <natalie.vock@gmx.de>
---
 drivers/gpu/drm/ttm/ttm_bo.c       | 57 ++++++++++++++++++++++++++++++--------
 drivers/gpu/drm/ttm/ttm_resource.c | 48 +++++++++++++++++++++++---------
 include/drm/ttm/ttm_resource.h     |  6 +++-
 3 files changed, 86 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4d9e68b21e70cb25d0db5e79391233e1dc72221..d20ff41411c08cd97b4467f603751f483d1c7ff4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -504,6 +504,8 @@ struct ttm_bo_evict_walk {
 	/** @evicted: Number of successful evictions. */
 	unsigned long evicted;
 
+	/** @charge_pool: The memory pool the resource is charged to */
+	struct dmem_cgroup_pool_state *charge_pool;
 	/** @limit_pool: Which pool limit we should test against */
 	struct dmem_cgroup_pool_state *limit_pool;
 	/** @try_low: Whether we should attempt to evict BO's with low watermark threshold */
@@ -539,7 +541,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
 	evict_walk->evicted++;
 	if (evict_walk->res)
 		lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place,
-					  evict_walk->res, NULL);
+					  evict_walk->res, evict_walk->charge_pool);
 	if (lret == 0)
 		return 1;
 out:
@@ -561,6 +563,8 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
 			      struct ttm_operation_ctx *ctx,
 			      struct ww_acquire_ctx *ticket,
 			      struct ttm_resource **res,
+			      bool only_evict_unprotected,
+			      struct dmem_cgroup_pool_state *charge_pool,
 			      struct dmem_cgroup_pool_state *limit_pool)
 {
 	struct ttm_bo_evict_walk evict_walk = {
@@ -574,6 +578,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
 		.place = place,
 		.evictor = evictor,
 		.res = res,
+		.charge_pool = charge_pool,
 		.limit_pool = limit_pool,
 	};
 	s64 lret;
@@ -582,7 +587,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
 	lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
 
 	/* One more attempt if we hit low limit? */
-	if (!lret && evict_walk.hit_low) {
+	if (!lret && evict_walk.hit_low && !only_evict_unprotected) {
 		evict_walk.try_low = true;
 		lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1);
 	}
@@ -603,7 +608,8 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
 	} while (!lret && evict_walk.evicted);
 
 	/* We hit the low limit? Try once more */
-	if (!lret && evict_walk.hit_low && !evict_walk.try_low) {
+	if (!lret && evict_walk.hit_low && !evict_walk.try_low &&
+			!only_evict_unprotected) {
 		evict_walk.try_low = true;
 		goto retry;
 	}
@@ -724,9 +730,9 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
-		struct dmem_cgroup_pool_state *limit_pool = NULL;
+		struct dmem_cgroup_pool_state *limit_pool = NULL, *charge_pool = NULL;
 		struct ttm_resource_manager *man;
-		bool may_evict;
+		bool may_evict, is_protected = false;
 
 		man = ttm_manager_type(bdev, place->mem_type);
 		if (!man || !ttm_resource_manager_used(man))
@@ -737,24 +743,53 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 			continue;
 
 		may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
-		ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL);
+		ret = ttm_resource_try_charge(bo, place, &charge_pool,
+					      force_space ? &limit_pool : NULL);
+		if (ret) {
+			if (ret != -EAGAIN) {
+				dmem_cgroup_pool_state_put(limit_pool);
+				return ret;
+			} else if (!may_evict) {
+				dmem_cgroup_pool_state_put(limit_pool);
+				continue;
+			}
+		} else {
+			is_protected = dmem_cgroup_below_min(NULL, charge_pool) ||
+				       dmem_cgroup_below_low(NULL, charge_pool);
+			ret = ttm_resource_alloc(bo, place, res, charge_pool);
+		}
+
 		if (ret) {
 			if (ret != -ENOSPC && ret != -EAGAIN) {
 				dmem_cgroup_pool_state_put(limit_pool);
+				if (charge_pool) {
+					dmem_cgroup_uncharge(charge_pool, bo->base.size);
+					dmem_cgroup_pool_state_put(charge_pool);
+				}
 				return ret;
 			}
-			if (!may_evict) {
+			if (!may_evict && !is_protected) {
 				dmem_cgroup_pool_state_put(limit_pool);
+				if (charge_pool) {
+					dmem_cgroup_uncharge(charge_pool, bo->base.size);
+					dmem_cgroup_pool_state_put(charge_pool);
+				}
 				continue;
 			}
 
 			ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx,
-						 ticket, res, limit_pool);
+						 ticket, res, !may_evict && is_protected,
+						 charge_pool, limit_pool);
 			dmem_cgroup_pool_state_put(limit_pool);
-			if (ret == -EBUSY)
-				continue;
-			if (ret)
+			if (ret) {
+				if (charge_pool) {
+					dmem_cgroup_uncharge(charge_pool, bo->base.size);
+					dmem_cgroup_pool_state_put(charge_pool);
+				}
+				if (ret == -EBUSY)
+					continue;
 				return ret;
+			}
 		}
 
 		ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index e2c82ad07eb44b5e88bf5b5db1ef54dd6d27823b..fcfa8b51b033745f46a01e40a9dc83e0c69165fc 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -372,30 +372,52 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
 }
 EXPORT_SYMBOL(ttm_resource_fini);
 
+/**
+ * ttm_resource_try_charge - charge a resource manager's cgroup pool
+ * @bo: buffer for which an allocation should be charged
+ * @place: where the allocation is attempted to be placed
+ * @ret_pool: on charge success, the pool that was charged
+ * @ret_limit_pool: on charge failure, the pool responsible for the failure
+ *
+ * Should be used to charge cgroups before attempting resource allocation.
+ * When charging succeeds, the value of ret_pool should be passed to
+ * ttm_resource_alloc.
+ *
+ * Returns: 0 on charge success, negative errno on failure.
+ */
+int ttm_resource_try_charge(struct ttm_buffer_object *bo,
+			    const struct ttm_place *place,
+			    struct dmem_cgroup_pool_state **ret_pool,
+			    struct dmem_cgroup_pool_state **ret_limit_pool)
+{
+	struct ttm_resource_manager *man =
+		ttm_manager_type(bo->bdev, place->mem_type);
+
+	if (!man->cg) {
+		*ret_pool = NULL;
+		if (ret_limit_pool)
+			*ret_limit_pool = NULL;
+		return 0;
+	}
+
+	return dmem_cgroup_try_charge(man->cg, bo->base.size, ret_pool,
+				      ret_limit_pool);
+}
+
 int ttm_resource_alloc(struct ttm_buffer_object *bo,
 		       const struct ttm_place *place,
 		       struct ttm_resource **res_ptr,
-		       struct dmem_cgroup_pool_state **ret_limit_pool)
+		       struct dmem_cgroup_pool_state *charge_pool)
 {
 	struct ttm_resource_manager *man =
 		ttm_manager_type(bo->bdev, place->mem_type);
-	struct dmem_cgroup_pool_state *pool = NULL;
 	int ret;
 
-	if (man->cg) {
-		ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool);
-		if (ret)
-			return ret;
-	}
-
 	ret = man->func->alloc(man, bo, place, res_ptr);
-	if (ret) {
-		if (pool)
-			dmem_cgroup_uncharge(pool, bo->base.size);
+	if (ret)
 		return ret;
-	}
 
-	(*res_ptr)->css = pool;
+	(*res_ptr)->css = charge_pool;
 
 	spin_lock(&bo->bdev->lru_lock);
 	ttm_resource_add_bulk_move(*res_ptr, bo);
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index e52bba15012f78e352f392232ac2e89a83afd311..3aef7efdd7cfb8fd93071db85e632b975b53cf81 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -442,10 +442,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
 void ttm_resource_fini(struct ttm_resource_manager *man,
 		       struct ttm_resource *res);
 
+int ttm_resource_try_charge(struct ttm_buffer_object *bo,
+			    const struct ttm_place *place,
+			    struct dmem_cgroup_pool_state **ret_pool,
+			    struct dmem_cgroup_pool_state **ret_limit_pool);
 int ttm_resource_alloc(struct ttm_buffer_object *bo,
 		       const struct ttm_place *place,
 		       struct ttm_resource **res,
-		       struct dmem_cgroup_pool_state **ret_limit_pool);
+		       struct dmem_cgroup_pool_state *charge_pool);
 void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res);
 bool ttm_resource_intersects(struct ttm_device *bdev,
 			     struct ttm_resource *res,

-- 
2.51.0


  parent reply	other threads:[~2025-09-15 12:37 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-15 12:36 [PATCH 0/4] cgroup/dmem,drm/ttm: Improve protection in contended cases Natalie Vock
2025-09-15 12:36 ` [PATCH 1/4] cgroup/dmem: Add queries for protection values Natalie Vock
2025-09-15 12:36 ` [PATCH 2/4] cgroup/dmem: Add dmem_cgroup_common_ancestor helper Natalie Vock
2025-09-15 12:36 ` Natalie Vock [this message]
2025-09-15 12:43   ` [PATCH 3/4] drm/ttm: Be more aggressive when allocating below protection limit Christian König
2025-09-15 12:36 ` [PATCH 4/4] drm/ttm: Use common ancestor of evictor and evictee as limit pool Natalie Vock
2025-09-15 12:48 ` [PATCH 0/4] cgroup/dmem,drm/ttm: Improve protection in contended cases Christian König
2025-09-15 13:17   ` [PATCH 0/4] cgroup/dmem, drm/ttm: " Natalie Vock
2025-09-15 13:23     ` Christian König
2025-09-15 13:44       ` Natalie Vock

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250915-dmemcg-aggressive-protect-v1-3-2f3353bfcdac@gmx.de \
    --to=natalie.vock@gmx.de \
    --cc=airlied@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dev@lankhorst.se \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hannes@cmpxchg.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=mkoutny@suse.com \
    --cc=mripard@kernel.org \
    --cc=ray.huang@amd.com \
    --cc=simona@ffwll.ch \
    --cc=tj@kernel.org \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox