[PATCH 06/16] ttm/pool: track allocated_pages per numa node.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Dave Airlie <airlied@gmail.com>
To: dri-devel@lists.freedesktop.org, tj@kernel.org,
	christian.koenig@amd.com, Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>
Cc: cgroups@vger.kernel.org, Dave Chinner <david@fromorbit.com>,
	Waiman Long <longman@redhat.com>,
	simona@ffwll.ch
Subject: [PATCH 06/16] ttm/pool: track allocated_pages per numa node.
Date: Thu, 16 Oct 2025 12:31:34 +1000	[thread overview]
Message-ID: <20251016023205.2303108-7-airlied@gmail.com> (raw)
In-Reply-To: <20251016023205.2303108-1-airlied@gmail.com>

From: Dave Airlie <airlied@redhat.com>

This gets the memory sizes from the nodes and stores the limit
as 50% of those. I think eventually we should drop the limits
once we have memcg aware shrinking, but this should be more NUMA
friendly, and I think seems like what people would prefer to
happen on NUMA aware systems.

Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 60 +++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index ae54f01f240b..a6b055256150 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -115,10 +115,11 @@ struct ttm_pool_tt_restore {
 
 static unsigned long page_pool_size;
 
-MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
+MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool per NUMA node");
 module_param(page_pool_size, ulong, 0644);
 
-static atomic_long_t allocated_pages;
+static unsigned long pool_node_limit[MAX_NUMNODES];
+static atomic_long_t allocated_pages[MAX_NUMNODES];
 
 static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS];
 static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS];
@@ -289,6 +290,7 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
 {
 	unsigned int i, num_pages = 1 << pt->order;
+	int nid = page_to_nid(p);
 
 	for (i = 0; i < num_pages; ++i) {
 		if (PageHighMem(p))
@@ -299,10 +301,10 @@ static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
 
 	INIT_LIST_HEAD(&p->lru);
 	rcu_read_lock();
-	list_lru_add(&pt->pages, &p->lru, page_to_nid(p), NULL);
+	list_lru_add(&pt->pages, &p->lru, nid, NULL);
 	rcu_read_unlock();
-	atomic_long_add(1 << pt->order, &allocated_pages);
 
+	atomic_long_add(num_pages, &allocated_pages[nid]);	
 	mod_lruvec_page_state(p, NR_GPU_ACTIVE, -num_pages);
 	mod_lruvec_page_state(p, NR_GPU_RECLAIM, num_pages);
 }
@@ -328,7 +330,7 @@ static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, int nid)
 
 	ret = list_lru_walk_node(&pt->pages, nid, take_one_from_lru, (void *)&p, &nr_to_walk);
 	if (ret == 1 && p) {
-		atomic_long_sub(1 << pt->order, &allocated_pages);
+		atomic_long_sub(1 << pt->order, &allocated_pages[nid]);
 		mod_lruvec_page_state(p, NR_GPU_ACTIVE, (1 << pt->order));
 		mod_lruvec_page_state(p, NR_GPU_RECLAIM, -(1 << pt->order));
 	}
@@ -367,7 +369,7 @@ static void ttm_pool_dispose_list(struct ttm_pool_type *pt,
 		struct page *p;
 		p = list_first_entry(dispose, struct page, lru);
 		list_del_init(&p->lru);
-		atomic_long_sub(1 << pt->order, &allocated_pages);
+		atomic_long_sub(1 << pt->order, &allocated_pages[page_to_nid(p)]);
 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p, true);
 	}
 }
@@ -925,11 +927,13 @@ int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
  */
 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
 {
+	int nid = ttm_pool_nid(pool);
+
 	ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages);
 
-	while (atomic_long_read(&allocated_pages) > page_pool_size) {
-		unsigned long diff = page_pool_size - atomic_long_read(&allocated_pages);
-		ttm_pool_shrink(ttm_pool_nid(pool), diff);
+	while (atomic_long_read(&allocated_pages[nid]) > pool_node_limit[nid]) {
+		unsigned long diff = pool_node_limit[nid] - atomic_long_read(&allocated_pages[nid]);
+		ttm_pool_shrink(nid, diff);
 	}
 }
 EXPORT_SYMBOL(ttm_pool_free);
@@ -1189,7 +1193,7 @@ static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
 	do
 		num_freed += ttm_pool_shrink(sc->nid, sc->nr_to_scan);
 	while (num_freed < sc->nr_to_scan &&
-	       atomic_long_read(&allocated_pages));
+	       atomic_long_read(&allocated_pages[sc->nid]));
 
 	sc->nr_scanned = num_freed;
 
@@ -1200,7 +1204,7 @@ static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
 					     struct shrink_control *sc)
 {
-	unsigned long num_pages = atomic_long_read(&allocated_pages);
+	unsigned long num_pages = atomic_long_read(&allocated_pages[sc->nid]);
 
 	return num_pages ? num_pages : SHRINK_EMPTY;
 }
@@ -1237,8 +1241,12 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
 /* Dump the total amount of allocated pages */
 static void ttm_pool_debugfs_footer(struct seq_file *m)
 {
-	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
-		   atomic_long_read(&allocated_pages), page_pool_size);
+	int nid;
+
+	for_each_node(nid) {
+		seq_printf(m, "\ntotal node%d\t: %8lu of %8lu\n", nid,
+			   atomic_long_read(&allocated_pages[nid]), pool_node_limit[nid]);
+	}
 }
 
 /* Dump the information for the global pools */
@@ -1332,6 +1340,22 @@ DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
 
 #endif
 
+static inline uint64_t ttm_get_node_memory_size(int nid)
+{
+	/* This is directly using si_meminfo_node implementation as the
+	 * function is not exported.
+	 */
+	int zone_type;
+	uint64_t managed_pages = 0;
+
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+		managed_pages +=
+			zone_managed_pages(&pgdat->node_zones[zone_type]);
+	return managed_pages * PAGE_SIZE;
+}
+
 /**
  * ttm_pool_mgr_init - Initialize globals
  *
@@ -1343,8 +1367,14 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 {
 	unsigned int i;
 
-	if (!page_pool_size)
-		page_pool_size = num_pages;
+	int nid;
+	for_each_node(nid) {
+		if (!page_pool_size) {
+			uint64_t node_size = ttm_get_node_memory_size(nid);
+			pool_node_limit[nid] = (node_size >> PAGE_SHIFT) / 2;
+		} else
+			pool_node_limit[nid] = page_pool_size;
+	}
 
 	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
-- 
2.51.0

next prev parent reply	other threads:[~2025-10-16  2:33 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-16  2:31 drm/ttm/memcg/lru: enable memcg tracking for ttm and amdgpu driver (complete series v4) Dave Airlie
2025-10-16  2:31 ` [PATCH 01/16] mm: add gpu active/reclaim per-node stat counters (v2) Dave Airlie
2025-10-16  7:48   ` Christian König
2025-10-16  2:31 ` [PATCH 02/16] drm/ttm: use gpu mm stats to track gpu memory allocations. (v4) Dave Airlie
2025-10-16  2:31 ` [PATCH 03/16] ttm/pool: port to list_lru. (v2) Dave Airlie
2025-10-16  2:31 ` [PATCH 04/16] ttm/pool: drop numa specific pools Dave Airlie
2025-10-16  2:31 ` [PATCH 05/16] ttm/pool: make pool shrinker NUMA aware Dave Airlie
2025-10-16  2:31 ` Dave Airlie [this message]
2025-10-16  2:31 ` [PATCH 07/16] memcg: add support for GPU page counters. (v3) Dave Airlie
2025-10-16  2:31 ` [PATCH 08/16] ttm: add a memcg accounting flag to the alloc/populate APIs Dave Airlie
2025-10-16  2:31 ` [PATCH 09/16] ttm/pool: initialise the shrinker earlier Dave Airlie
2025-10-16  2:31 ` [PATCH 10/16] ttm: add objcg pointer to bo and tt (v2) Dave Airlie
2025-10-16  2:31 ` [PATCH 11/16] ttm/pool: enable memcg tracking and shrinker. (v2) Dave Airlie
2025-10-16  2:31 ` [PATCH 12/16] ttm: hook up memcg placement flags Dave Airlie
2025-10-16  2:31 ` [PATCH 13/16] memcontrol: allow objcg api when memcg is config off Dave Airlie
2025-10-16  2:31 ` [PATCH 14/16] amdgpu: add support for memory cgroups Dave Airlie
2025-10-16  2:31 ` [PATCH 15/16] ttm: add support for a module option to disable memcg integration Dave Airlie
2025-10-16  2:31 ` [PATCH 16/16] xe: create a flag to enable memcg accounting for XE as well Dave Airlie
  -- strict thread matches above, loose matches on Subject: below --
2026-02-24  2:06 drm/ttm/memcg/lru: enable memcg tracking for ttm and amdgpu driver (complete series v5) Dave Airlie
2026-02-24  2:06 ` [PATCH 06/16] ttm/pool: track allocated_pages per numa node Dave Airlie

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ae54f01f240 dfblob:a6b05525615 )
 OR (
bs:"[PATCH 06/16] ttm/pool: track allocated_pages per numa node." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251016023205.2303108-7-airlied@gmail.com \
    --to=airlied@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=david@fromorbit.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hannes@cmpxchg.org \
    --cc=longman@redhat.com \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=simona@ffwll.ch \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.