* + mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch added to mm-new branch
@ 2025-10-08 0:33 Andrew Morton
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Morton @ 2025-10-08 0:33 UTC (permalink / raw)
To: mm-commits, ziy, zhengqi.arch, shakeel.butt, roman.gushchin,
muchun.song, harry.yoo, hannes, david, songmuchun, akpm
The patch titled
Subject: mm: thp: introduce folio_split_queue_lock and its variants
has been added to the -mm mm-new branch. Its filename is
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Muchun Song <songmuchun@bytedance.com>
Subject: mm: thp: introduce folio_split_queue_lock and its variants
Date: Sat, 4 Oct 2025 00:53:16 +0800
In future memcg removal, the binding between a folio and a memcg may
change, making the split lock within the memcg unstable when held.
A new approach is required to reparent the split queue to its parent.
This patch starts introducing a unified way to acquire the split lock for
future work.
It's a code-only refactoring with no functional changes.
Link: https://lkml.kernel.org/r/68492101fa51d4bab4ade327354b986c0959250e.1759510072.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memcontrol.h | 10 ++
mm/huge_memory.c | 119 ++++++++++++++++++++++++-----------
2 files changed, 94 insertions(+), 35 deletions(-)
--- a/include/linux/memcontrol.h~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/include/linux/memcontrol.h
@@ -1674,6 +1674,11 @@ int alloc_shrinker_info(struct mem_cgrou
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return shrinker->id;
+}
#else
#define mem_cgroup_sockets_enabled 0
@@ -1705,6 +1710,11 @@ static inline void set_shrinker_bit(stru
int nid, int shrinker_id)
{
}
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return -1;
+}
#endif
#ifdef CONFIG_MEMCG
--- a/mm/huge_memory.c~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/mm/huge_memory.c
@@ -1076,28 +1076,86 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struc
return pmd;
}
+static struct deferred_split *split_queue_node(int nid)
+{
+ struct pglist_data *pgdata = NODE_DATA(nid);
+
+ return &pgdata->deferred_split_queue;
+}
+
#ifdef CONFIG_MEMCG
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct mem_cgroup *memcg = folio_memcg(folio);
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ if (mem_cgroup_disabled())
+ return NULL;
+ if (split_queue_node(folio_nid(folio)) == queue)
+ return NULL;
+ return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}
- if (memcg)
- return &memcg->deferred_split_queue;
- else
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return memcg ? &memcg->deferred_split_queue : split_queue_node(nid);
}
#else
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ return NULL;
+}
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return split_queue_node(nid);
}
#endif
+static struct deferred_split *split_queue_lock(int nid, struct mem_cgroup *memcg)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock(&queue->split_queue_lock);
+
+ return queue;
+}
+
+static struct deferred_split *
+split_queue_lock_irqsave(int nid, struct mem_cgroup *memcg, unsigned long *flags)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+ return queue;
+}
+
+static struct deferred_split *folio_split_queue_lock(struct folio *folio)
+{
+ return split_queue_lock(folio_nid(folio), folio_memcg(folio));
+}
+
+static struct deferred_split *
+folio_split_queue_lock_irqsave(struct folio *folio, unsigned long *flags)
+{
+ return split_queue_lock_irqsave(folio_nid(folio), folio_memcg(folio), flags);
+}
+
+static inline void split_queue_unlock(struct deferred_split *queue)
+{
+ spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void split_queue_unlock_irqrestore(struct deferred_split *queue,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
static inline bool is_transparent_hugepage(const struct folio *folio)
{
if (!folio_test_large(folio))
@@ -3579,7 +3637,7 @@ static int __folio_split(struct folio *f
struct page *split_at, struct page *lock_at,
struct list_head *list, bool uniform_split)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
+ struct deferred_split *ds_queue;
XA_STATE(xas, &folio->mapping->i_pages, folio->index);
struct folio *end_folio = folio_next(folio);
bool is_anon = folio_test_anon(folio);
@@ -3718,7 +3776,7 @@ static int __folio_split(struct folio *f
}
/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&ds_queue->split_queue_lock);
+ ds_queue = folio_split_queue_lock(folio);
if (folio_ref_freeze(folio, 1 + extra_pins)) {
struct swap_cluster_info *ci = NULL;
struct lruvec *lruvec;
@@ -3740,7 +3798,7 @@ static int __folio_split(struct folio *f
*/
list_del_init(&folio->_deferred_list);
}
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
if (mapping) {
int nr = folio_nr_pages(folio);
@@ -3835,7 +3893,7 @@ static int __folio_split(struct folio *f
if (ci)
swap_cluster_unlock(ci);
} else {
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
ret = -EAGAIN;
}
fail:
@@ -4016,8 +4074,7 @@ bool __folio_unqueue_deferred_split(stru
WARN_ON_ONCE(folio_ref_count(folio));
WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg_charged(folio));
- ds_queue = get_deferred_split_queue(folio);
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (!list_empty(&folio->_deferred_list)) {
ds_queue->split_queue_len--;
if (folio_test_partially_mapped(folio)) {
@@ -4028,7 +4085,7 @@ bool __folio_unqueue_deferred_split(stru
list_del_init(&folio->_deferred_list);
unqueued = true;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
return unqueued; /* useful for debug warnings */
}
@@ -4036,10 +4093,7 @@ bool __folio_unqueue_deferred_split(stru
/* partially_mapped=false won't clear PG_partially_mapped folio flag */
void deferred_split_folio(struct folio *folio, bool partially_mapped)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = folio_memcg(folio);
-#endif
+ struct deferred_split *ds_queue;
unsigned long flags;
/*
@@ -4062,7 +4116,7 @@ void deferred_split_folio(struct folio *
if (folio_test_swapcache(folio))
return;
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (partially_mapped) {
if (!folio_test_partially_mapped(folio)) {
folio_set_partially_mapped(folio);
@@ -4077,15 +4131,16 @@ void deferred_split_folio(struct folio *
VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
}
if (list_empty(&folio->_deferred_list)) {
+ struct mem_cgroup *memcg;
+
+ memcg = folio_split_queue_memcg(folio, ds_queue);
list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
ds_queue->split_queue_len++;
-#ifdef CONFIG_MEMCG
if (memcg)
set_shrinker_bit(memcg, folio_nid(folio),
- deferred_split_shrinker->id);
-#endif
+ shrinker_id(deferred_split_shrinker));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
}
static unsigned long deferred_split_count(struct shrinker *shrink,
@@ -4128,19 +4183,13 @@ static bool thp_underused(struct folio *
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct pglist_data *pgdata = NODE_DATA(sc->nid);
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue;
unsigned long flags;
LIST_HEAD(list);
struct folio *folio, *next, *prev = NULL;
int split = 0, removed = 0;
-#ifdef CONFIG_MEMCG
- if (sc->memcg)
- ds_queue = &sc->memcg->deferred_split_queue;
-#endif
-
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = split_queue_lock_irqsave(sc->nid, sc->memcg, &flags);
/* Take pin on all head pages to avoid freeing them under us */
list_for_each_entry_safe(folio, next, &ds_queue->split_queue,
_deferred_list) {
@@ -4159,7 +4208,7 @@ static unsigned long deferred_split_scan
if (!--sc->nr_to_scan)
break;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
list_for_each_entry_safe(folio, next, &list, _deferred_list) {
bool did_split = false;
_
Patches currently in -mm which might be from songmuchun@bytedance.com are
mm-thp-replace-folio_memcg-with-folio_memcg_charged.patch
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
mm-thp-use-folio_batch-to-handle-thp-splitting-in-deferred_split_scan.patch
^ permalink raw reply [flat|nested] 3+ messages in thread
* + mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch added to mm-new branch
@ 2025-10-15 22:01 Andrew Morton
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Morton @ 2025-10-15 22:01 UTC (permalink / raw)
To: mm-commits, ziy, zhengqi.arch, shakeel.butt, ryan.roberts,
roman.gushchin, npache, muchun.song, mhocko, lorenzo.stoakes,
liam.howlett, lance.yang, hughd, harry.yoo, hannes, dev.jain,
david, baolin.wang, baohua, songmuchun, akpm
The patch titled
Subject: mm: thp: introduce folio_split_queue_lock and its variants
has been added to the -mm mm-new branch. Its filename is
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Muchun Song <songmuchun@bytedance.com>
Subject: mm: thp: introduce folio_split_queue_lock and its variants
Date: Wed, 15 Oct 2025 14:35:31 +0800
In future memcg removal, the binding between a folio and a memcg may
change, making the split lock within the memcg unstable when held.
A new approach is required to reparent the split queue to its parent.
This patch starts introducing a unified way to acquire the split lock for
future work.
It's a code-only refactoring with no functional changes.
Link: https://lkml.kernel.org/r/77069514656ea81a82969369f24da25ea1304e9c.1760509767.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memcontrol.h | 10 ++
mm/huge_memory.c | 119 ++++++++++++++++++++++++-----------
2 files changed, 94 insertions(+), 35 deletions(-)
--- a/include/linux/memcontrol.h~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/include/linux/memcontrol.h
@@ -1674,6 +1674,11 @@ int alloc_shrinker_info(struct mem_cgrou
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return shrinker->id;
+}
#else
#define mem_cgroup_sockets_enabled 0
@@ -1705,6 +1710,11 @@ static inline void set_shrinker_bit(stru
int nid, int shrinker_id)
{
}
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return -1;
+}
#endif
#ifdef CONFIG_MEMCG
--- a/mm/huge_memory.c~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/mm/huge_memory.c
@@ -1076,28 +1076,86 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struc
return pmd;
}
+static struct deferred_split *split_queue_node(int nid)
+{
+ struct pglist_data *pgdata = NODE_DATA(nid);
+
+ return &pgdata->deferred_split_queue;
+}
+
#ifdef CONFIG_MEMCG
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct mem_cgroup *memcg = folio_memcg(folio);
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ if (mem_cgroup_disabled())
+ return NULL;
+ if (split_queue_node(folio_nid(folio)) == queue)
+ return NULL;
+ return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}
- if (memcg)
- return &memcg->deferred_split_queue;
- else
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return memcg ? &memcg->deferred_split_queue : split_queue_node(nid);
}
#else
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ return NULL;
+}
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return split_queue_node(nid);
}
#endif
+static struct deferred_split *split_queue_lock(int nid, struct mem_cgroup *memcg)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock(&queue->split_queue_lock);
+
+ return queue;
+}
+
+static struct deferred_split *
+split_queue_lock_irqsave(int nid, struct mem_cgroup *memcg, unsigned long *flags)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+ return queue;
+}
+
+static struct deferred_split *folio_split_queue_lock(struct folio *folio)
+{
+ return split_queue_lock(folio_nid(folio), folio_memcg(folio));
+}
+
+static struct deferred_split *
+folio_split_queue_lock_irqsave(struct folio *folio, unsigned long *flags)
+{
+ return split_queue_lock_irqsave(folio_nid(folio), folio_memcg(folio), flags);
+}
+
+static inline void split_queue_unlock(struct deferred_split *queue)
+{
+ spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void split_queue_unlock_irqrestore(struct deferred_split *queue,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
static inline bool is_transparent_hugepage(const struct folio *folio)
{
if (!folio_test_large(folio))
@@ -3654,7 +3712,7 @@ static int __folio_split(struct folio *f
struct page *split_at, struct page *lock_at,
struct list_head *list, bool uniform_split, bool unmapped)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
+ struct deferred_split *ds_queue;
XA_STATE(xas, &folio->mapping->i_pages, folio->index);
struct folio *end_folio = folio_next(folio);
bool is_anon = folio_test_anon(folio);
@@ -3796,7 +3854,7 @@ static int __folio_split(struct folio *f
}
/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&ds_queue->split_queue_lock);
+ ds_queue = folio_split_queue_lock(folio);
if (folio_ref_freeze(folio, 1 + extra_pins)) {
struct swap_cluster_info *ci = NULL;
struct lruvec *lruvec;
@@ -3818,7 +3876,7 @@ static int __folio_split(struct folio *f
*/
list_del_init(&folio->_deferred_list);
}
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
if (mapping) {
int nr = folio_nr_pages(folio);
@@ -3918,7 +3976,7 @@ static int __folio_split(struct folio *f
if (ci)
swap_cluster_unlock(ci);
} else {
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
ret = -EAGAIN;
}
fail:
@@ -4104,8 +4162,7 @@ bool __folio_unqueue_deferred_split(stru
WARN_ON_ONCE(folio_ref_count(folio));
WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg_charged(folio));
- ds_queue = get_deferred_split_queue(folio);
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (!list_empty(&folio->_deferred_list)) {
ds_queue->split_queue_len--;
if (folio_test_partially_mapped(folio)) {
@@ -4116,7 +4173,7 @@ bool __folio_unqueue_deferred_split(stru
list_del_init(&folio->_deferred_list);
unqueued = true;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
return unqueued; /* useful for debug warnings */
}
@@ -4124,10 +4181,7 @@ bool __folio_unqueue_deferred_split(stru
/* partially_mapped=false won't clear PG_partially_mapped folio flag */
void deferred_split_folio(struct folio *folio, bool partially_mapped)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = folio_memcg(folio);
-#endif
+ struct deferred_split *ds_queue;
unsigned long flags;
/*
@@ -4150,7 +4204,7 @@ void deferred_split_folio(struct folio *
if (folio_test_swapcache(folio))
return;
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (partially_mapped) {
if (!folio_test_partially_mapped(folio)) {
folio_set_partially_mapped(folio);
@@ -4165,15 +4219,16 @@ void deferred_split_folio(struct folio *
VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
}
if (list_empty(&folio->_deferred_list)) {
+ struct mem_cgroup *memcg;
+
+ memcg = folio_split_queue_memcg(folio, ds_queue);
list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
ds_queue->split_queue_len++;
-#ifdef CONFIG_MEMCG
if (memcg)
set_shrinker_bit(memcg, folio_nid(folio),
- deferred_split_shrinker->id);
-#endif
+ shrinker_id(deferred_split_shrinker));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
}
static unsigned long deferred_split_count(struct shrinker *shrink,
@@ -4219,19 +4274,13 @@ static bool thp_underused(struct folio *
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct pglist_data *pgdata = NODE_DATA(sc->nid);
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue;
unsigned long flags;
LIST_HEAD(list);
struct folio *folio, *next, *prev = NULL;
int split = 0, removed = 0;
-#ifdef CONFIG_MEMCG
- if (sc->memcg)
- ds_queue = &sc->memcg->deferred_split_queue;
-#endif
-
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = split_queue_lock_irqsave(sc->nid, sc->memcg, &flags);
/* Take pin on all head pages to avoid freeing them under us */
list_for_each_entry_safe(folio, next, &ds_queue->split_queue,
_deferred_list) {
@@ -4250,7 +4299,7 @@ static unsigned long deferred_split_scan
if (!--sc->nr_to_scan)
break;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
list_for_each_entry_safe(folio, next, &list, _deferred_list) {
bool did_split = false;
_
Patches currently in -mm which might be from songmuchun@bytedance.com are
mm-thp-replace-folio_memcg-with-folio_memcg_charged.patch
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
mm-thp-use-folio_batch-to-handle-thp-splitting-in-deferred_split_scan.patch
^ permalink raw reply [flat|nested] 3+ messages in thread
* + mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch added to mm-new branch
@ 2025-11-10 20:04 Andrew Morton
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Morton @ 2025-11-10 20:04 UTC (permalink / raw)
To: mm-commits, ziy, zhengqi.arch, shakeel.butt, ryan.roberts,
roman.gushchin, richard.weiyang, npache, muchun.song, mhocko,
lorenzo.stoakes, liam.howlett, lance.yang, hughd, harry.yoo,
hannes, dev.jain, david, baolin.wang, baohua, songmuchun, akpm
The patch titled
Subject: mm: thp: introduce folio_split_queue_lock and its variants
has been added to the -mm mm-new branch. Its filename is
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Muchun Song <songmuchun@bytedance.com>
Subject: mm: thp: introduce folio_split_queue_lock and its variants
Date: Mon, 10 Nov 2025 16:17:56 +0800
In future memcg removal, the binding between a folio and a memcg may
change, making the split lock within the memcg unstable when held.
A new approach is required to reparent the split queue to its parent.
This patch starts introducing a unified way to acquire the split lock for
future work.
It's a code-only refactoring with no functional changes.
Link: https://lkml.kernel.org/r/a31a90bcac04dc754f775e87ae3205be3170b571.1762762324.git.zhengqi.arch@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nico Pache <npache@redhat.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memcontrol.h | 10 ++
mm/huge_memory.c | 119 ++++++++++++++++++++++++-----------
2 files changed, 94 insertions(+), 35 deletions(-)
--- a/include/linux/memcontrol.h~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/include/linux/memcontrol.h
@@ -1647,6 +1647,11 @@ int alloc_shrinker_info(struct mem_cgrou
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return shrinker->id;
+}
#else
#define mem_cgroup_sockets_enabled 0
@@ -1678,6 +1683,11 @@ static inline void set_shrinker_bit(stru
int nid, int shrinker_id)
{
}
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return -1;
+}
#endif
#ifdef CONFIG_MEMCG
--- a/mm/huge_memory.c~mm-thp-introduce-folio_split_queue_lock-and-its-variants
+++ a/mm/huge_memory.c
@@ -1077,28 +1077,86 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struc
return pmd;
}
+static struct deferred_split *split_queue_node(int nid)
+{
+ struct pglist_data *pgdata = NODE_DATA(nid);
+
+ return &pgdata->deferred_split_queue;
+}
+
#ifdef CONFIG_MEMCG
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct mem_cgroup *memcg = folio_memcg(folio);
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ if (mem_cgroup_disabled())
+ return NULL;
+ if (split_queue_node(folio_nid(folio)) == queue)
+ return NULL;
+ return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}
- if (memcg)
- return &memcg->deferred_split_queue;
- else
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return memcg ? &memcg->deferred_split_queue : split_queue_node(nid);
}
#else
static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
+struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+ return NULL;
+}
- return &pgdat->deferred_split_queue;
+static struct deferred_split *memcg_split_queue(int nid, struct mem_cgroup *memcg)
+{
+ return split_queue_node(nid);
}
#endif
+static struct deferred_split *split_queue_lock(int nid, struct mem_cgroup *memcg)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock(&queue->split_queue_lock);
+
+ return queue;
+}
+
+static struct deferred_split *
+split_queue_lock_irqsave(int nid, struct mem_cgroup *memcg, unsigned long *flags)
+{
+ struct deferred_split *queue;
+
+ queue = memcg_split_queue(nid, memcg);
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+ return queue;
+}
+
+static struct deferred_split *folio_split_queue_lock(struct folio *folio)
+{
+ return split_queue_lock(folio_nid(folio), folio_memcg(folio));
+}
+
+static struct deferred_split *
+folio_split_queue_lock_irqsave(struct folio *folio, unsigned long *flags)
+{
+ return split_queue_lock_irqsave(folio_nid(folio), folio_memcg(folio), flags);
+}
+
+static inline void split_queue_unlock(struct deferred_split *queue)
+{
+ spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void split_queue_unlock_irqrestore(struct deferred_split *queue,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
static inline bool is_transparent_hugepage(const struct folio *folio)
{
if (!folio_test_large(folio))
@@ -3690,7 +3748,7 @@ static int __folio_split(struct folio *f
struct page *split_at, struct page *lock_at,
struct list_head *list, enum split_type split_type, bool unmapped)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
+ struct deferred_split *ds_queue;
XA_STATE(xas, &folio->mapping->i_pages, folio->index);
struct folio *end_folio = folio_next(folio);
bool is_anon = folio_test_anon(folio);
@@ -3826,7 +3884,7 @@ static int __folio_split(struct folio *f
}
/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&ds_queue->split_queue_lock);
+ ds_queue = folio_split_queue_lock(folio);
if (folio_ref_freeze(folio, 1 + extra_pins)) {
struct swap_cluster_info *ci = NULL;
struct lruvec *lruvec;
@@ -3848,7 +3906,7 @@ static int __folio_split(struct folio *f
*/
list_del_init(&folio->_deferred_list);
}
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
if (mapping) {
int nr = folio_nr_pages(folio);
@@ -3948,7 +4006,7 @@ static int __folio_split(struct folio *f
if (ci)
swap_cluster_unlock(ci);
} else {
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
ret = -EAGAIN;
}
fail:
@@ -4131,8 +4189,7 @@ bool __folio_unqueue_deferred_split(stru
WARN_ON_ONCE(folio_ref_count(folio));
WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg_charged(folio));
- ds_queue = get_deferred_split_queue(folio);
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (!list_empty(&folio->_deferred_list)) {
ds_queue->split_queue_len--;
if (folio_test_partially_mapped(folio)) {
@@ -4143,7 +4200,7 @@ bool __folio_unqueue_deferred_split(stru
list_del_init(&folio->_deferred_list);
unqueued = true;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
return unqueued; /* useful for debug warnings */
}
@@ -4151,10 +4208,7 @@ bool __folio_unqueue_deferred_split(stru
/* partially_mapped=false won't clear PG_partially_mapped folio flag */
void deferred_split_folio(struct folio *folio, bool partially_mapped)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(folio);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = folio_memcg(folio);
-#endif
+ struct deferred_split *ds_queue;
unsigned long flags;
/*
@@ -4177,7 +4231,7 @@ void deferred_split_folio(struct folio *
if (folio_test_swapcache(folio))
return;
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (partially_mapped) {
if (!folio_test_partially_mapped(folio)) {
folio_set_partially_mapped(folio);
@@ -4192,15 +4246,16 @@ void deferred_split_folio(struct folio *
VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
}
if (list_empty(&folio->_deferred_list)) {
+ struct mem_cgroup *memcg;
+
+ memcg = folio_split_queue_memcg(folio, ds_queue);
list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
ds_queue->split_queue_len++;
-#ifdef CONFIG_MEMCG
if (memcg)
set_shrinker_bit(memcg, folio_nid(folio),
- deferred_split_shrinker->id);
-#endif
+ shrinker_id(deferred_split_shrinker));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
}
static unsigned long deferred_split_count(struct shrinker *shrink,
@@ -4246,19 +4301,13 @@ static bool thp_underused(struct folio *
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct pglist_data *pgdata = NODE_DATA(sc->nid);
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue;
unsigned long flags;
LIST_HEAD(list);
struct folio *folio, *next, *prev = NULL;
int split = 0, removed = 0;
-#ifdef CONFIG_MEMCG
- if (sc->memcg)
- ds_queue = &sc->memcg->deferred_split_queue;
-#endif
-
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = split_queue_lock_irqsave(sc->nid, sc->memcg, &flags);
/* Take pin on all head pages to avoid freeing them under us */
list_for_each_entry_safe(folio, next, &ds_queue->split_queue,
_deferred_list) {
@@ -4277,7 +4326,7 @@ static unsigned long deferred_split_scan
if (!--sc->nr_to_scan)
break;
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
list_for_each_entry_safe(folio, next, &list, _deferred_list) {
bool did_split = false;
_
Patches currently in -mm which might be from songmuchun@bytedance.com are
mm-thp-replace-folio_memcg-with-folio_memcg_charged.patch
mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch
mm-thp-use-folio_batch-to-handle-thp-splitting-in-deferred_split_scan.patch
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-11-10 20:04 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-10 20:04 + mm-thp-introduce-folio_split_queue_lock-and-its-variants.patch added to mm-new branch Andrew Morton
-- strict thread matches above, loose matches on Subject: below --
2025-10-15 22:01 Andrew Morton
2025-10-08 0:33 Andrew Morton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).