cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Airlie <airlied@gmail.com>
To: dri-devel@lists.freedesktop.org, tj@kernel.org,
	christian.koenig@amd.com, Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>
Cc: cgroups@vger.kernel.org, Waiman Long <longman@redhat.com>,
	simona@ffwll.ch
Subject: [PATCH 3/5] ttm: add initial memcg integration. (v2)
Date: Fri,  2 May 2025 13:36:02 +1000	[thread overview]
Message-ID: <20250502034046.1625896-4-airlied@gmail.com> (raw)
In-Reply-To: <20250502034046.1625896-1-airlied@gmail.com>

From: Dave Airlie <airlied@redhat.com>

Doing proper integration of TTM system memory allocations with
memcg is a difficult ask, primarily due to difficulties around
accounting for evictions properly.

However there are systems where userspace will be allocating
objects in system memory and they won't be prone to migrating
or evicting and we should start with at least accounting those.

This adds a memcg group to ttm bo and resource objects.

This memcg is used when:
a) resource is allocated in system/tt memory
b) the account_op is set in the operation context

This patch disables the flag around object evictions,
but any operation that could populate a TTM tt object in process context
should set the account_op flag.

This v2 moves the charging up a level and also no longer uses
__GFP_ACCOUNT, or attaches the memcg to object pages, it instead
uses the same approach as socket memory and just charges/uncharges
at the object level. This was suggested by Christian.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c       |  9 +++++++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c    |  3 ++-
 drivers/gpu/drm/ttm/ttm_resource.c | 20 ++++++++++++++++++++
 include/drm/ttm/ttm_bo.h           |  8 ++++++++
 include/drm/ttm/ttm_resource.h     |  6 +++++-
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 95b86003c50d..89d2df246ed2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -537,6 +537,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *
 	evict_walk->evicted++;
 	if (evict_walk->res)
 		lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place,
+					  walk->ctx,
 					  evict_walk->res, NULL);
 	if (lret == 0)
 		return 1;
@@ -733,7 +734,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 			continue;
 
 		may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
-		ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL);
+		ret = ttm_resource_alloc(bo, place, ctx, res, force_space ? &limit_pool : NULL);
 		if (ret) {
 			if (ret != -ENOSPC && ret != -EAGAIN) {
 				dmem_cgroup_pool_state_put(limit_pool);
@@ -744,8 +745,12 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 				continue;
 			}
 
+			/* we don't want to account evictions at this point */
+			bool old_ctx_account = ctx->account_op;
+			ctx->account_op = false;
 			ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx,
 						 ticket, res, limit_pool);
+			ctx->account_op = old_ctx_account;
 			dmem_cgroup_pool_state_put(limit_pool);
 			if (ret == -EBUSY)
 				continue;
@@ -1145,7 +1150,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 
 		memset(&hop, 0, sizeof(hop));
 		place.mem_type = TTM_PL_SYSTEM;
-		ret = ttm_resource_alloc(bo, &place, &evict_mem, NULL);
+		ret = ttm_resource_alloc(bo, &place, ctx, &evict_mem, NULL);
 		if (ret)
 			goto out;
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a194db83421d..163039cf40a5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -220,7 +220,8 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 		struct ttm_operation_ctx ctx = {
 			.interruptible = true,
 			.no_wait_gpu = false,
-			.force_alloc = true
+			.force_alloc = true,
+			.account_op = true,
 		};
 
 		ttm = bo->ttm;
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 7e5a60c55813..da257678a5ba 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -27,6 +27,7 @@
 #include <linux/iosys-map.h>
 #include <linux/scatterlist.h>
 #include <linux/cgroup_dmem.h>
+#include <linux/memcontrol.h>
 
 #include <drm/ttm/ttm_bo.h>
 #include <drm/ttm/ttm_placement.h>
@@ -373,12 +374,14 @@ EXPORT_SYMBOL(ttm_resource_fini);
 
 int ttm_resource_alloc(struct ttm_buffer_object *bo,
 		       const struct ttm_place *place,
+		       struct ttm_operation_ctx *ctx,
 		       struct ttm_resource **res_ptr,
 		       struct dmem_cgroup_pool_state **ret_limit_pool)
 {
 	struct ttm_resource_manager *man =
 		ttm_manager_type(bo->bdev, place->mem_type);
 	struct dmem_cgroup_pool_state *pool = NULL;
+	struct mem_cgroup *memcg = NULL;
 	int ret;
 
 	if (man->cg) {
@@ -387,13 +390,26 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo,
 			return ret;
 	}
 
+	if ((place->mem_type == TTM_PL_SYSTEM || place->mem_type == TTM_PL_TT) &&
+	    ctx->account_op && bo->memcg) {
+		memcg = bo->memcg;
+		gfp_t gfp_flags = GFP_USER;
+		if (ctx->gfp_retry_mayfail)
+			gfp_flags |= __GFP_RETRY_MAYFAIL;
+
+		if (!mem_cgroup_charge_gpu(memcg, bo->base.size >> PAGE_SHIFT, gfp_flags))
+			return -ENOMEM;
+	}
 	ret = man->func->alloc(man, bo, place, res_ptr);
 	if (ret) {
 		if (pool)
 			dmem_cgroup_uncharge(pool, bo->base.size);
+		if (memcg)
+			mem_cgroup_uncharge_gpu(memcg, bo->base.size >> PAGE_SHIFT);
 		return ret;
 	}
 
+	(*res_ptr)->memcg = memcg;
 	(*res_ptr)->css = pool;
 
 	spin_lock(&bo->bdev->lru_lock);
@@ -407,6 +423,7 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
 {
 	struct ttm_resource_manager *man;
 	struct dmem_cgroup_pool_state *pool;
+	struct mem_cgroup *memcg;
 
 	if (!*res)
 		return;
@@ -416,11 +433,14 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
 	spin_unlock(&bo->bdev->lru_lock);
 
 	pool = (*res)->css;
+	memcg = (*res)->memcg;
 	man = ttm_manager_type(bo->bdev, (*res)->mem_type);
 	man->func->free(man, *res);
 	*res = NULL;
 	if (man->cg)
 		dmem_cgroup_uncharge(pool, bo->base.size);
+	if (memcg)
+		mem_cgroup_uncharge_gpu(memcg, bo->base.size >> PAGE_SHIFT);
 }
 EXPORT_SYMBOL(ttm_resource_free);
 
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 903cd1030110..56a33b5f5c41 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -135,6 +135,12 @@ struct ttm_buffer_object {
 	 * reservation lock.
 	 */
 	struct sg_table *sg;
+
+	/**
+	 * @memcg: memory cgroup to charge this to if it ends up using system memory.
+	 * NULL means don't charge.
+	 */
+	struct mem_cgroup *memcg;
 };
 
 #define TTM_BO_MAP_IOMEM_MASK 0x80
@@ -174,6 +180,7 @@ struct ttm_bo_kmap_obj {
  * BOs share the same reservation object.
  * @force_alloc: Don't check the memory account during suspend or CPU page
  * faults. Should only be used by TTM internally.
+ * @account_op: account for any memory allocations by a bo with an memcg.
  * @resv: Reservation object to allow reserved evictions with.
  * @bytes_moved: Statistics on how many bytes have been moved.
  *
@@ -186,6 +193,7 @@ struct ttm_operation_ctx {
 	bool gfp_retry_mayfail;
 	bool allow_res_evict;
 	bool force_alloc;
+	bool account_op;
 	struct dma_resv *resv;
 	uint64_t bytes_moved;
 };
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index e52bba15012f..1ab515c6ec00 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -45,6 +45,7 @@ struct ttm_resource;
 struct ttm_place;
 struct ttm_buffer_object;
 struct ttm_placement;
+struct ttm_operation_ctx;
 struct iosys_map;
 struct io_mapping;
 struct sg_table;
@@ -245,7 +246,8 @@ struct ttm_bus_placement {
  * @placement: Placement flags.
  * @bus: Placement on io bus accessible to the CPU
  * @bo: weak reference to the BO, protected by ttm_device::lru_lock
- * @css: cgroup state this resource is charged to
+ * @css: cgroup state this resource is charged to for dmem
+ * @memcg: memory cgroup this resource is charged to for sysmem
  *
  * Structure indicating the placement and space resources used by a
  * buffer object.
@@ -264,6 +266,7 @@ struct ttm_resource {
 	 * @lru: Least recently used list, see &ttm_resource_manager.lru
 	 */
 	struct ttm_lru_item lru;
+	struct mem_cgroup *memcg;
 };
 
 /**
@@ -444,6 +447,7 @@ void ttm_resource_fini(struct ttm_resource_manager *man,
 
 int ttm_resource_alloc(struct ttm_buffer_object *bo,
 		       const struct ttm_place *place,
+		       struct ttm_operation_ctx *ctx,
 		       struct ttm_resource **res,
 		       struct dmem_cgroup_pool_state **ret_limit_pool);
 void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res);
-- 
2.49.0


  parent reply	other threads:[~2025-05-02  3:41 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-02  3:35 [rfc] drm/ttm/memcg: simplest initial memcg/ttm integration (v2) Dave Airlie
2025-05-02  3:36 ` [PATCH 1/5] memcg: add GPU statistic Dave Airlie
2025-05-02  3:36 ` [PATCH 2/5] memcg: add hooks for gpu memcg charging/uncharging Dave Airlie
2025-05-02  3:36 ` Dave Airlie [this message]
2025-05-02 12:01   ` [PATCH 3/5] ttm: add initial memcg integration. (v2) Christian König
2025-05-02 14:24   ` kernel test robot
2025-05-03  2:09   ` kernel test robot
2025-05-02  3:36 ` [PATCH 4/5] amdgpu: add support for memcg integration Dave Airlie
2025-05-02 14:01   ` Waiman Long
2025-05-02  3:36 ` [PATCH 5/5] nouveau: add " Dave Airlie
2025-05-06  0:37 ` [rfc] drm/ttm/memcg: simplest initial memcg/ttm integration (v2) Shakeel Butt
2025-05-06  0:59   ` Dave Airlie
2025-05-07 17:52 ` Johannes Weiner
2025-05-07 22:03   ` Dave Airlie
2025-05-07 22:11     ` Dave Airlie
2025-05-13  7:54     ` Johannes Weiner
2025-05-15  3:02       ` Dave Airlie
2025-05-15  8:55         ` Christian König
2025-05-15 15:04           ` Waiman Long
2025-05-15 15:16             ` Christian König
2025-05-15 16:08           ` Johannes Weiner
2025-05-16  6:53             ` Christian König
2025-05-16 14:53               ` Johannes Weiner
2025-05-16 15:35                 ` Christian König
2025-05-16 16:41                   ` Johannes Weiner
2025-05-16 17:42                     ` Christian König
2025-05-16 20:04                       ` Johannes Weiner
2025-05-16 20:25                         ` Dave Airlie
2025-05-18 16:28                           ` Christian König
2025-05-19  6:18                             ` Dave Airlie
2025-05-19  8:26                               ` Christian König
2025-05-22 19:51                           ` Tejun Heo
2025-05-23  7:58                             ` Christian König
2025-05-23 17:06                               ` Tejun Heo
2025-05-26  8:19                                 ` Christian König
2025-05-26 20:13                                   ` Dave Airlie
2025-05-27  8:01                                     ` Christian König
2025-05-16 16:12         ` Johannes Weiner
2025-05-21  2:23       ` Dave Airlie
2025-05-21  7:50         ` Christian König
2025-05-21 14:43         ` Johannes Weiner
2025-05-22  7:03           ` Dave Airlie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250502034046.1625896-4-airlied@gmail.com \
    --to=airlied@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hannes@cmpxchg.org \
    --cc=longman@redhat.com \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=simona@ffwll.ch \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).