* [PATCH v3 1/6] mm: Wire up order in shrink_control
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 18:23 ` [PATCH v3 2/6] mm: Introduce zone_maybe_fragmented_in_shrinker() Matthew Brost
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel
Cc: Andrew Morton, Dave Chinner, Qi Zheng, Roman Gushchin,
Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
Johannes Weiner, Shakeel Butt, Kairui Song, Barry Song,
Axel Rasmussen, Yuanchu Xie, Wei Xu, linux-mm, linux-kernel,
Thomas Hellström
Pass the allocation order through shrink_control so shrinkers have
visibility into the order that triggered reclaim.
This allows shrinkers to implement better heuristics, such as detecting
high-order allocation pressure or fragmentation and avoiding eviction
of working sets when reclaim is invoked from kswapd.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: David Hildenbrand <david@kernel.org>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Kairui Song <kasong@tencent.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
include/linux/shrinker.h | 3 +++
mm/internal.h | 4 ++--
mm/shrinker.c | 11 +++++++----
mm/vmscan.c | 7 ++++---
4 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..7072f693b9be 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -37,6 +37,9 @@ struct shrink_control {
/* current node being shrunk (for NUMA aware shrinkers) */
int nid;
+ /* Allocation order we are currently trying to fulfil. */
+ s8 order;
+
/*
* How many objects scan_objects should scan and try to reclaim.
* This is reset before every call, so it is safe for callees
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..ff8671dccf7b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1759,8 +1759,8 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
void __meminit __init_page_from_nid(unsigned long pfn, int nid);
/* shrinker related functions */
-unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
- int priority);
+unsigned long shrink_slab(gfp_t gfp_mask, int nid, s8 order,
+ struct mem_cgroup *memcg, int priority);
int shmem_add_to_page_cache(struct folio *folio,
struct address_space *mapping,
diff --git a/mm/shrinker.c b/mm/shrinker.c
index 76b3f750cf65..fb23a338fb22 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -466,7 +466,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
}
#ifdef CONFIG_MEMCG
-static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, s8 order,
struct mem_cgroup *memcg, int priority)
{
struct shrinker_info *info;
@@ -528,6 +528,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
+ .order = order,
.memcg = memcg,
};
struct shrinker *shrinker;
@@ -598,6 +599,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
* shrink_slab - shrink slab caches
* @gfp_mask: allocation context
* @nid: node whose slab caches to target
+ * @order: order of allocation
* @memcg: memory cgroup whose slab caches to target
* @priority: the reclaim priority
*
@@ -614,8 +616,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
*
* Returns the number of reclaimed slab objects.
*/
-unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
- int priority)
+unsigned long shrink_slab(gfp_t gfp_mask, int nid, s8 order,
+ struct mem_cgroup *memcg, int priority)
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
@@ -628,7 +630,7 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
* oom.
*/
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
- return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
+ return shrink_slab_memcg(gfp_mask, nid, order, memcg, priority);
/*
* lockless algorithm of global shrink.
@@ -656,6 +658,7 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
+ .order = order,
.memcg = memcg,
};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd1b1aa12581..a54d14ecad25 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -412,7 +412,7 @@ static unsigned long drop_slab_node(int nid)
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
+ freed += shrink_slab(GFP_KERNEL, nid, 0, memcg, 0);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
return freed;
@@ -5068,7 +5068,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
success = try_to_shrink_lruvec(lruvec, sc);
- shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
+ shrink_slab(sc->gfp_mask, pgdat->node_id, sc->order, memcg,
+ sc->priority);
if (!sc->proactive)
vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
@@ -6170,7 +6171,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
shrink_lruvec(lruvec, sc);
- shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+ shrink_slab(sc->gfp_mask, pgdat->node_id, sc->order, memcg,
sc->priority);
/* Record the group's reclaim efficiency */
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 2/6] mm: Introduce zone_maybe_fragmented_in_shrinker()
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
2026-04-30 18:23 ` [PATCH v3 1/6] mm: Wire up order in shrink_control Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 18:23 ` [PATCH v3 3/6] drm/ttm: Issue direct reclaim at beneficial_order Matthew Brost
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel
Cc: Thomas Hellström, Andrew Morton, David Hildenbrand,
Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
Suren Baghdasaryan, Michal Hocko, linux-mm, linux-kernel
Introduce zone_maybe_fragmented_in_shrinker() as a lightweight helper to
allow subsystems to make coarse decisions about reclaim behavior in the
presence of likely fragmentation.
The helper implements a simple heuristic: if the number of free pages
in a zone exceeds twice the high watermark, the zone is considered to
have ample free memory and allocation failures are more likely due to
fragmentation than overall memory pressure.
This is intentionally imprecise and is not meant to replace the core
MM compaction or fragmentation accounting logic. Instead, it provides
a cheap signal for callers (e.g., shrinkers) that wish to avoid
overly aggressive reclaim when sufficient free memory exists but
high-order allocations may still fail.
No functional changes; this is a preparatory helper for future users.
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
v3: s/zone_appear_fragmented/zone_maybe_fragmented_in_shrinker (David
Hildenbrand)
---
include/linux/vmstat.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3c9c266cf782..1ad48f70c9d9 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -483,6 +483,18 @@ static inline const char *zone_stat_name(enum zone_stat_item item)
return vmstat_text[item];
}
+static inline bool zone_maybe_fragmented_in_shrinker(struct zone *zone)
+{
+ /*
+ * Simple heuristic: if the number of free pages is more than twice the
+ * high watermark, this may suggest that the zone is heavily fragmented.
+ * When called from a shrinker, aggressively evicting memory in this case
+ * may do more harm to overall system performance than good.
+ */
+ return zone_page_state(zone, NR_FREE_PAGES) >
+ high_wmark_pages(zone) * 2;
+}
+
#ifdef CONFIG_NUMA
static inline const char *numa_stat_name(enum numa_stat_item item)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 3/6] drm/ttm: Issue direct reclaim at beneficial_order
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
2026-04-30 18:23 ` [PATCH v3 1/6] mm: Wire up order in shrink_control Matthew Brost
2026-04-30 18:23 ` [PATCH v3 2/6] mm: Introduce zone_maybe_fragmented_in_shrinker() Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 18:23 ` [PATCH v3 4/6] drm/ttm: Introduce ttm_bo_shrink_kswap_maybe_fragmented() Matthew Brost
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel
Cc: Tvrtko Ursulin, Thomas Hellström, Carlos Santa,
Christian Koenig, Huang Rui, Matthew Auld, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Daniel Colascione, Andi Shyti
Triggering kswap at an order higher than beneficial_order makes little
sense, as the driver has already indicated the optimal order at which
reclaim is effective. Similarly, issuing direct reclaim or triggering
kswap at a lower order than beneficial_order is ineffective, since the
driver does not benefit from reclaiming lower-order pages.
As a result, direct reclaim should only be issued with __GFP_NORETRY at
exactly beneficial_order, or as a fallback, direct reclaim without
__GFP_NORETRY at order 0 when failure is not an option.
Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
CC: dri-devel@lists.freedesktop.org
Cc: Daniel Colascione <dancol@dancol.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_pool.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 26a3689e5fd9..8425dbcc6c68 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -165,8 +165,8 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
* Do not add latency to the allocation path for allocations orders
* device tolds us do not bring them additional performance gains.
*/
- if (beneficial_order && order > beneficial_order)
- gfp_flags &= ~__GFP_DIRECT_RECLAIM;
+ if (order && beneficial_order && order != beneficial_order)
+ gfp_flags &= ~__GFP_RECLAIM;
if (!ttm_pool_uses_dma_alloc(pool)) {
p = alloc_pages_node(pool->nid, gfp_flags, order);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 4/6] drm/ttm: Introduce ttm_bo_shrink_kswap_maybe_fragmented()
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
` (2 preceding siblings ...)
2026-04-30 18:23 ` [PATCH v3 3/6] drm/ttm: Issue direct reclaim at beneficial_order Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 18:23 ` [PATCH v3 5/6] drm/xe: Set TTM device beneficial_order to 9 (2M) Matthew Brost
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel
Cc: Thomas Hellström, Carlos Santa, Christian Koenig, Huang Rui,
Matthew Auld, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Daniel Colascione
Introduce ttm_bo_shrink_kswap_maybe_fragmented() to allow TTM users to
distinguish background reclaim from kswapd that is likely driven by
high-order allocation failures under fragmentation.
The helper returns true when:
- order of shrinker invocation is zero
- reclaim is running in kswapd, and
- the target node is valid, and
- one of the relevant zones reports free pages significantly above
its high watermark (via zone_appears_fragmented()).
This provides a coarse signal that overall free memory is available,
and that reclaim activity may be driven by fragmentation rather than
true memory pressure.
The intent is to allow drivers to adjust shrinker behavior in this
case, for example by preferring purgeable or low-value objects instead
of aggressively evicting active working sets in the background reclaim
path.
The heuristic is intentionally simple and conservative, and is not
intended to replace core MM fragmentation or compaction decisions.
No functional change; this is a preparatory helper for TTM users.
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
CC: dri-devel@lists.freedesktop.org
Cc: Daniel Colascione <dancol@dancol.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
v3:
- s/ttm_bo_shrink_kswap_fragmented/ttm_bo_shrink_kswap_maybe_fragmented
(Andi)
- Wire in order (Thomas)
---
drivers/gpu/drm/ttm/ttm_bo_util.c | 38 +++++++++++++++++++++++++++++++
include/drm/ttm/ttm_bo.h | 2 ++
2 files changed, 40 insertions(+)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index f83b7d5ec6c6..a6a4255c10cc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -1169,3 +1169,41 @@ bool ttm_bo_shrink_avoid_wait(void)
return !current_is_kswapd();
}
EXPORT_SYMBOL(ttm_bo_shrink_avoid_wait);
+
+/**
+ * ttm_bo_shrink_kswap_maybe_fragmented() - Whether in kswap and memory might be
+ * fragmented
+ * @nid: current node being shrunk
+ * @order: order of shrinker invocation
+ *
+ * Return: true if in kswap and memory appears fragmented, false is not.
+ */
+bool ttm_bo_shrink_kswap_maybe_fragmented(int nid, s8 order)
+{
+ enum zone_type zone_type;
+
+ if (!order)
+ return false;
+
+ if (!current_is_kswapd())
+ return false;
+
+ if (!numa_valid_node(nid))
+ return false;
+
+#if IS_ENABLED(CONFIG_ZONE_DMA32)
+ zone_type = ZONE_DMA32;
+#else
+ zone_type = ZONE_NORMAL;
+#endif
+
+ for (; zone_type <= ZONE_NORMAL; ++zone_type) {
+ struct zone *zone = &NODE_DATA(nid)->node_zones[zone_type];
+
+ if (zone_maybe_fragmented_in_shrinker(zone))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(ttm_bo_shrink_kswap_maybe_fragmented);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8310bc3d55f9..4d00f9aa90a1 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -262,6 +262,8 @@ bool ttm_bo_shrink_suitable(struct ttm_buffer_object *bo, struct ttm_operation_c
bool ttm_bo_shrink_avoid_wait(void);
+bool ttm_bo_shrink_kswap_maybe_fragmented(int nid, s8 order);
+
/**
* ttm_bo_reserve:
*
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 5/6] drm/xe: Set TTM device beneficial_order to 9 (2M)
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
` (3 preceding siblings ...)
2026-04-30 18:23 ` [PATCH v3 4/6] drm/ttm: Introduce ttm_bo_shrink_kswap_maybe_fragmented() Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 18:23 ` [PATCH v3 6/6] drm/xe: Avoid shrinker reclaim from kswapd under fragmentation Matthew Brost
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel
Cc: Thomas Hellström, Carlos Santa, Matthew Auld, Andi Shyti
Set the TTM device beneficial_order to 9 (2M), which is the sweet
spot for Xe when attempting reclaim on system memory BOs, as it matches
the large GPU page size. This ensures reclaim is attempted at the most
effective order for the driver.
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 4b45b617a039..3f719ab08d1c 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -500,7 +500,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
xe->drm.anon_inode->i_mapping,
- xe->drm.vma_offset_manager, 0);
+ xe->drm.vma_offset_manager,
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (WARN_ON(err))
return ERR_PTR(err);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 6/6] drm/xe: Avoid shrinker reclaim from kswapd under fragmentation
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
` (4 preceding siblings ...)
2026-04-30 18:23 ` [PATCH v3 5/6] drm/xe: Set TTM device beneficial_order to 9 (2M) Matthew Brost
@ 2026-04-30 18:23 ` Matthew Brost
2026-04-30 19:06 ` ✗ CI.checkpatch: warning for mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2) Patchwork
2026-04-30 19:06 ` ✗ CI.KUnit: failure " Patchwork
7 siblings, 0 replies; 9+ messages in thread
From: Matthew Brost @ 2026-04-30 18:23 UTC (permalink / raw)
To: intel-xe, dri-devel; +Cc: Thomas Hellström, Carlos Santa, Matthew Auld
When the Xe shrinker is invoked from kswapd, a large amount of free
memory in usable zones relative to the high watermark is a strong
signal that reclaim is being driven by fragmentation rather than true
memory pressure.
In this case, shrinking Xe memory is unlikely to help kswapd make
forward progress. Instead it can evict active GPU memory despite the
system still having substantial free memory, increasing residency churn
and reducing GPU forward progress.
Detect this case and bail out early from the Xe shrinker when running in
kswapd, shrinker invocation is a higher order, and any usable zone has
more than 2x its high watermark free.
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
v3:
- Wire in order to heuristic (Thomas)
---
drivers/gpu/drm/xe/xe_shrinker.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 83374cd57660..792e0e216442 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -236,6 +236,9 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
if (nr_scanned >= nr_to_scan || !can_backup)
goto out;
+ if (ttm_bo_shrink_kswap_maybe_fragmented(sc->nid, sc->order))
+ goto out;
+
/* If we didn't wake before, try to do it now if needed. */
if (!runtime_pm)
runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* ✗ CI.checkpatch: warning for mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2)
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
` (5 preceding siblings ...)
2026-04-30 18:23 ` [PATCH v3 6/6] drm/xe: Avoid shrinker reclaim from kswapd under fragmentation Matthew Brost
@ 2026-04-30 19:06 ` Patchwork
2026-04-30 19:06 ` ✗ CI.KUnit: failure " Patchwork
7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2026-04-30 19:06 UTC (permalink / raw)
To: Matthew Brost; +Cc: intel-xe
== Series Details ==
Series: mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2)
URL : https://patchwork.freedesktop.org/series/165329/
State : warning
== Summary ==
+ KERNEL=/kernel
+ git clone https://gitlab.freedesktop.org/drm/maintainer-tools mt
Cloning into 'mt'...
warning: redirecting to https://gitlab.freedesktop.org/drm/maintainer-tools.git/
+ git -C mt rev-list -n1 origin/master
c8c12e558adaef7a4d125d83b6e1f8824bc13b82
+ cd /kernel
+ git config --global --add safe.directory /kernel
+ git log -n1
commit b7aea8d410b38ff13cdd77dcb31bef0f8b9b5f8b
Author: Matthew Brost <matthew.brost@intel.com>
Date: Thu Apr 30 11:23:35 2026 -0700
drm/xe: Avoid shrinker reclaim from kswapd under fragmentation
When the Xe shrinker is invoked from kswapd, a large amount of free
memory in usable zones relative to the high watermark is a strong
signal that reclaim is being driven by fragmentation rather than true
memory pressure.
In this case, shrinking Xe memory is unlikely to help kswapd make
forward progress. Instead it can evict active GPU memory despite the
system still having substantial free memory, increasing residency churn
and reducing GPU forward progress.
Detect this case and bail out early from the Xe shrinker when running in
kswapd, shrinker invocation is a higher order, and any usable zone has
more than 2x its high watermark free.
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+ /mt/dim checkpatch 6472756e8384e3945f12b6726dd9dc1ac12f3bb4 drm-intel
c8517f9b1fa3 mm: Wire up order in shrink_control
-:79: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#79: FILE: mm/shrinker.c:470:
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, s8 order,
struct mem_cgroup *memcg, int priority)
total: 0 errors, 0 warnings, 1 checks, 91 lines checked
887652c0b41c mm: Introduce zone_maybe_fragmented_in_shrinker()
4d1f3593c173 drm/ttm: Issue direct reclaim at beneficial_order
a64a875d276b drm/ttm: Introduce ttm_bo_shrink_kswap_maybe_fragmented()
892550c48b83 drm/xe: Set TTM device beneficial_order to 9 (2M)
b7aea8d410b3 drm/xe: Avoid shrinker reclaim from kswapd under fragmentation
^ permalink raw reply [flat|nested] 9+ messages in thread* ✗ CI.KUnit: failure for mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2)
2026-04-30 18:23 [PATCH v3 0/6] mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation Matthew Brost
` (6 preceding siblings ...)
2026-04-30 19:06 ` ✗ CI.checkpatch: warning for mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2) Patchwork
@ 2026-04-30 19:06 ` Patchwork
7 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2026-04-30 19:06 UTC (permalink / raw)
To: Matthew Brost; +Cc: intel-xe
== Series Details ==
Series: mm, drm/ttm, drm/xe: Avoid reclaim/eviction loops under fragmentation (rev2)
URL : https://patchwork.freedesktop.org/series/165329/
State : failure
== Summary ==
+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
ERROR:root:../mm/shrinker.c: In function ‘shrink_slab’:
../mm/shrinker.c:633:57: warning: passing argument 3 of ‘shrink_slab_memcg’ makes pointer from integer without a cast [-Wint-conversion]
633 | return shrink_slab_memcg(gfp_mask, nid, order, memcg, priority);
| ^~~~~
| |
| s8 {aka signed char}
../mm/shrinker.c:592:44: note: expected ‘struct mem_cgroup *’ but argument is of type ‘s8’ {aka ‘signed char’}
592 | struct mem_cgroup *memcg, int priority)
| ~~~~~~~~~~~~~~~~~~~^~~~~
../mm/shrinker.c:633:64: warning: passing argument 4 of ‘shrink_slab_memcg’ makes integer from pointer without a cast [-Wint-conversion]
633 | return shrink_slab_memcg(gfp_mask, nid, order, memcg, priority);
| ^~~~~
| |
| struct mem_cgroup *
../mm/shrinker.c:592:55: note: expected ‘int’ but argument is of type ‘struct mem_cgroup *’
592 | struct mem_cgroup *memcg, int priority)
| ~~~~^~~~~~~~
../mm/shrinker.c:633:24: error: too many arguments to function ‘shrink_slab_memcg’
633 | return shrink_slab_memcg(gfp_mask, nid, order, memcg, priority);
| ^~~~~~~~~~~~~~~~~
../mm/shrinker.c:591:22: note: declared here
591 | static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
| ^~~~~~~~~~~~~~~~~
make[4]: *** [../scripts/Makefile.build:289: mm/shrinker.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [../scripts/Makefile.build:548: mm] Error 2
make[3]: *** Waiting for unfinished jobs....
make[2]: *** [/kernel/Makefile:2141: .] Error 2
make[1]: *** [/kernel/Makefile:248: __sub-make] Error 2
make: *** [Makefile:248: __sub-make] Error 2
[19:06:10] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[19:06:15] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel
^ permalink raw reply [flat|nested] 9+ messages in thread