From: Nico Pache <npache@redhat.com>
To: linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-trace-kernel@vger.kernel.org
Cc: aarcange@redhat.com, akpm@linux-foundation.org,
anshuman.khandual@arm.com, apopple@nvidia.com, baohua@kernel.org,
baolin.wang@linux.alibaba.com, byungchul@sk.com,
catalin.marinas@arm.com, cl@gentwo.org, corbet@lwn.net,
dave.hansen@linux.intel.com, david@kernel.org, dev.jain@arm.com,
gourry@gourry.net, hannes@cmpxchg.org, hughd@google.com,
jack@suse.cz, jackmanb@google.com, jannh@google.com,
jglisse@google.com, joshua.hahnjy@gmail.com, kas@kernel.org,
lance.yang@linux.dev, Liam.Howlett@oracle.com,
lorenzo.stoakes@oracle.com, mathieu.desnoyers@efficios.com,
matthew.brost@intel.com, mhiramat@kernel.org, mhocko@suse.com,
npache@redhat.com, peterx@redhat.com, pfalcato@suse.de,
rakie.kim@sk.com, raquini@redhat.com, rdunlap@infradead.org,
richard.weiyang@gmail.com, rientjes@google.com,
rostedt@goodmis.org, rppt@kernel.org, ryan.roberts@arm.com,
shivankg@amd.com, sunnanyong@huawei.com, surenb@google.com,
thomas.hellstrom@linux.intel.com, tiwai@suse.de,
usamaarif642@gmail.com, vbabka@suse.cz, vishal.moola@gmail.com,
wangkefeng.wang@huawei.com, will@kernel.org, willy@infradead.org,
yang@os.amperecomputing.com, ying.huang@linux.alibaba.com,
ziy@nvidia.com, zokeefe@google.com
Subject: [PATCH mm-unstable v15 03/13] mm/khugepaged: generalize __collapse_huge_page_* for mTHP support
Date: Wed, 25 Feb 2026 20:23:47 -0700 [thread overview]
Message-ID: <20260226032347.232939-1-npache@redhat.com> (raw)
In-Reply-To: <20260226031741.230674-1-npache@redhat.com>
generalize the order of the __collapse_huge_page_* functions
to support future mTHP collapse.
mTHP collapse will not honor the khugepaged_max_ptes_shared or
khugepaged_max_ptes_swap parameters, and will fail if it encounters a
shared or swapped entry.
No functional changes in this patch.
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Co-developed-by: Dev Jain <dev.jain@arm.com>
Signed-off-by: Dev Jain <dev.jain@arm.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
mm/khugepaged.c | 73 +++++++++++++++++++++++++++++++------------------
1 file changed, 47 insertions(+), 26 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a9b645402b7f..ecdbbf6a01a6 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -535,7 +535,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long start_addr, pte_t *pte, struct collapse_control *cc,
- struct list_head *compound_pagelist)
+ unsigned int order, struct list_head *compound_pagelist)
{
struct page *page = NULL;
struct folio *folio = NULL;
@@ -543,15 +543,17 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
pte_t *_pte;
int none_or_zero = 0, shared = 0, referenced = 0;
enum scan_result result = SCAN_FAIL;
+ const unsigned long nr_pages = 1UL << order;
+ int max_ptes_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
- for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+ for (_pte = pte; _pte < pte + nr_pages;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
if (pte_none_or_zero(pteval)) {
++none_or_zero;
if (!userfaultfd_armed(vma) &&
(!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
+ none_or_zero <= max_ptes_none)) {
continue;
} else {
result = SCAN_EXCEED_NONE_PTE;
@@ -585,8 +587,14 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
/* See collapse_scan_pmd(). */
if (folio_maybe_mapped_shared(folio)) {
++shared;
- if (cc->is_khugepaged &&
- shared > khugepaged_max_ptes_shared) {
+ /*
+ * TODO: Support shared pages without leading to further
+ * mTHP collapses. Currently bringing in new pages via
+ * shared may cause a future higher order collapse on a
+ * rescan of the same range.
+ */
+ if (!is_pmd_order(order) || (cc->is_khugepaged &&
+ shared > khugepaged_max_ptes_shared)) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out;
@@ -679,18 +687,18 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
}
static void __collapse_huge_page_copy_succeeded(pte_t *pte,
- struct vm_area_struct *vma,
- unsigned long address,
- spinlock_t *ptl,
- struct list_head *compound_pagelist)
+ struct vm_area_struct *vma, unsigned long address,
+ spinlock_t *ptl, unsigned int order,
+ struct list_head *compound_pagelist)
{
- unsigned long end = address + HPAGE_PMD_SIZE;
+ unsigned long end = address + (PAGE_SIZE << order);
struct folio *src, *tmp;
pte_t pteval;
pte_t *_pte;
unsigned int nr_ptes;
+ const unsigned long nr_pages = 1UL << order;
- for (_pte = pte; _pte < pte + HPAGE_PMD_NR; _pte += nr_ptes,
+ for (_pte = pte; _pte < pte + nr_pages; _pte += nr_ptes,
address += nr_ptes * PAGE_SIZE) {
nr_ptes = 1;
pteval = ptep_get(_pte);
@@ -743,13 +751,11 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
}
static void __collapse_huge_page_copy_failed(pte_t *pte,
- pmd_t *pmd,
- pmd_t orig_pmd,
- struct vm_area_struct *vma,
- struct list_head *compound_pagelist)
+ pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
+ unsigned int order, struct list_head *compound_pagelist)
{
spinlock_t *pmd_ptl;
-
+ const unsigned long nr_pages = 1UL << order;
/*
* Re-establish the PMD to point to the original page table
* entry. Restoring PMD needs to be done prior to releasing
@@ -763,7 +769,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
* Release both raw and compound pages isolated
* in __collapse_huge_page_isolate.
*/
- release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
+ release_pte_pages(pte, pte + nr_pages, compound_pagelist);
}
/*
@@ -783,16 +789,16 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
*/
static enum scan_result __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
- unsigned long address, spinlock_t *ptl,
+ unsigned long address, spinlock_t *ptl, unsigned int order,
struct list_head *compound_pagelist)
{
unsigned int i;
enum scan_result result = SCAN_SUCCEED;
-
+ const unsigned long nr_pages = 1UL << order;
/*
* Copying pages' contents is subject to memory poison at any iteration.
*/
- for (i = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 0; i < nr_pages; i++) {
pte_t pteval = ptep_get(pte + i);
struct page *page = folio_page(folio, i);
unsigned long src_addr = address + i * PAGE_SIZE;
@@ -811,10 +817,10 @@ static enum scan_result __collapse_huge_page_copy(pte_t *pte, struct folio *foli
if (likely(result == SCAN_SUCCEED))
__collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
- compound_pagelist);
+ order, compound_pagelist);
else
__collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
- compound_pagelist);
+ order, compound_pagelist);
return result;
}
@@ -985,12 +991,12 @@ static enum scan_result check_pmd_still_valid(struct mm_struct *mm,
* Returns result: if not SCAN_SUCCEED, mmap_lock has been released.
*/
static enum scan_result __collapse_huge_page_swapin(struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long start_addr, pmd_t *pmd,
- int referenced)
+ struct vm_area_struct *vma, unsigned long start_addr,
+ pmd_t *pmd, int referenced, unsigned int order)
{
int swapped_in = 0;
vm_fault_t ret = 0;
- unsigned long addr, end = start_addr + (HPAGE_PMD_NR * PAGE_SIZE);
+ unsigned long addr, end = start_addr + (PAGE_SIZE << order);
enum scan_result result;
pte_t *pte = NULL;
spinlock_t *ptl;
@@ -1022,6 +1028,19 @@ static enum scan_result __collapse_huge_page_swapin(struct mm_struct *mm,
pte_present(vmf.orig_pte))
continue;
+ /*
+ * TODO: Support swapin without leading to further mTHP
+ * collapses. Currently bringing in new pages via swapin may
+ * cause a future higher order collapse on a rescan of the same
+ * range.
+ */
+ if (!is_pmd_order(order)) {
+ pte_unmap(pte);
+ mmap_read_unlock(mm);
+ result = SCAN_EXCEED_SWAP_PTE;
+ goto out;
+ }
+
vmf.pte = pte;
vmf.ptl = ptl;
ret = do_swap_page(&vmf);
@@ -1141,7 +1160,7 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
* that case. Continuing to collapse causes inconsistency.
*/
result = __collapse_huge_page_swapin(mm, vma, address, pmd,
- referenced);
+ referenced, HPAGE_PMD_ORDER);
if (result != SCAN_SUCCEED)
goto out_nolock;
}
@@ -1189,6 +1208,7 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
if (pte) {
result = __collapse_huge_page_isolate(vma, address, pte, cc,
+ HPAGE_PMD_ORDER,
&compound_pagelist);
spin_unlock(pte_ptl);
} else {
@@ -1219,6 +1239,7 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
result = __collapse_huge_page_copy(pte, folio, pmd, _pmd,
vma, address, pte_ptl,
+ HPAGE_PMD_ORDER,
&compound_pagelist);
pte_unmap(pte);
if (unlikely(result != SCAN_SUCCEED))
--
2.53.0
next prev parent reply other threads:[~2026-02-26 3:24 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-26 3:17 [PATCH mm-unstable v15 00/13] khugepaged: mTHP support Nico Pache
2026-02-26 3:22 ` [PATCH mm-unstable v15 01/13] mm/khugepaged: generalize hugepage_vma_revalidate for " Nico Pache
2026-03-12 20:00 ` David Hildenbrand (Arm)
2026-02-26 3:23 ` [PATCH mm-unstable v15 02/13] mm/khugepaged: generalize alloc_charge_folio() Nico Pache
2026-03-12 20:05 ` David Hildenbrand (Arm)
2026-02-26 3:23 ` Nico Pache [this message]
2026-03-12 20:32 ` [PATCH mm-unstable v15 03/13] mm/khugepaged: generalize __collapse_huge_page_* for mTHP support David Hildenbrand (Arm)
2026-03-12 20:36 ` David Hildenbrand (Arm)
2026-03-12 20:56 ` David Hildenbrand (Arm)
2026-04-08 19:48 ` Nico Pache
2026-04-09 8:14 ` David Hildenbrand (Arm)
2026-04-09 16:17 ` Nico Pache
2026-04-09 18:35 ` David Hildenbrand (Arm)
2026-02-26 3:24 ` [PATCH mm-unstable v15 04/13] mm/khugepaged: introduce collapse_max_ptes_none helper function Nico Pache
2026-02-26 3:24 ` [PATCH mm-unstable v15 05/13] mm/khugepaged: generalize collapse_huge_page for mTHP collapse Nico Pache
2026-03-17 16:51 ` Lorenzo Stoakes (Oracle)
2026-03-17 17:16 ` Randy Dunlap
2026-04-16 4:14 ` Nico Pache
2026-04-16 6:43 ` Lorenzo Stoakes
2026-02-26 3:24 ` [PATCH mm-unstable v15 06/13] mm/khugepaged: skip collapsing mTHP to smaller orders Nico Pache
2026-03-12 21:00 ` David Hildenbrand (Arm)
2026-04-13 1:38 ` Nico Pache
2026-04-13 7:37 ` David Hildenbrand (Arm)
2026-02-26 3:25 ` [PATCH mm-unstable v15 07/13] mm/khugepaged: add per-order mTHP collapse failure statistics Nico Pache
2026-03-12 21:03 ` David Hildenbrand (Arm)
2026-03-17 17:05 ` Lorenzo Stoakes (Oracle)
2026-04-13 2:48 ` Nico Pache
2026-04-16 7:21 ` Lorenzo Stoakes
2026-02-26 3:25 ` [PATCH mm-unstable v15 08/13] mm/khugepaged: improve tracepoints for mTHP orders Nico Pache
2026-03-12 21:05 ` David Hildenbrand (Arm)
2026-02-26 3:25 ` [PATCH mm-unstable v15 09/13] mm/khugepaged: introduce collapse_allowable_orders helper function Nico Pache
2026-03-12 21:09 ` David Hildenbrand (Arm)
2026-03-17 17:08 ` Lorenzo Stoakes (Oracle)
2026-02-26 3:26 ` [PATCH mm-unstable v15 10/13] mm/khugepaged: Introduce mTHP collapse support Nico Pache
2026-03-12 21:16 ` David Hildenbrand (Arm)
2026-03-17 21:36 ` Lorenzo Stoakes (Oracle)
2026-02-26 3:26 ` [PATCH mm-unstable v15 11/13] mm/khugepaged: avoid unnecessary mTHP collapse attempts Nico Pache
2026-02-26 16:26 ` Usama Arif
2026-02-26 20:47 ` Nico Pache
2026-03-12 21:19 ` David Hildenbrand (Arm)
2026-03-17 10:35 ` Lorenzo Stoakes (Oracle)
2026-03-18 18:59 ` Nico Pache
2026-03-18 19:48 ` David Hildenbrand (Arm)
2026-03-19 15:59 ` Lorenzo Stoakes (Oracle)
2026-02-26 3:26 ` [PATCH mm-unstable v15 12/13] mm/khugepaged: run khugepaged for all orders Nico Pache
2026-02-26 15:53 ` Usama Arif
2026-03-12 21:22 ` David Hildenbrand (Arm)
2026-03-17 10:58 ` Lorenzo Stoakes (Oracle)
2026-03-18 19:02 ` Nico Pache
2026-03-17 11:36 ` Lance Yang
2026-03-18 19:07 ` Nico Pache
2026-02-26 3:27 ` [PATCH mm-unstable v15 13/13] Documentation: mm: update the admin guide for mTHP collapse Nico Pache
2026-03-17 11:02 ` Lorenzo Stoakes (Oracle)
2026-03-18 19:08 ` Nico Pache
2026-03-18 19:49 ` David Hildenbrand (Arm)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260226032347.232939-1-npache@redhat.com \
--to=npache@redhat.com \
--cc=Liam.Howlett@oracle.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=anshuman.khandual@arm.com \
--cc=apopple@nvidia.com \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=byungchul@sk.com \
--cc=catalin.marinas@arm.com \
--cc=cl@gentwo.org \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=jackmanb@google.com \
--cc=jannh@google.com \
--cc=jglisse@google.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kas@kernel.org \
--cc=lance.yang@linux.dev \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=matthew.brost@intel.com \
--cc=mhiramat@kernel.org \
--cc=mhocko@suse.com \
--cc=peterx@redhat.com \
--cc=pfalcato@suse.de \
--cc=rakie.kim@sk.com \
--cc=raquini@redhat.com \
--cc=rdunlap@infradead.org \
--cc=richard.weiyang@gmail.com \
--cc=rientjes@google.com \
--cc=rostedt@goodmis.org \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=shivankg@amd.com \
--cc=sunnanyong@huawei.com \
--cc=surenb@google.com \
--cc=thomas.hellstrom@linux.intel.com \
--cc=tiwai@suse.de \
--cc=usamaarif642@gmail.com \
--cc=vbabka@suse.cz \
--cc=vishal.moola@gmail.com \
--cc=wangkefeng.wang@huawei.com \
--cc=will@kernel.org \
--cc=willy@infradead.org \
--cc=yang@os.amperecomputing.com \
--cc=ying.huang@linux.alibaba.com \
--cc=ziy@nvidia.com \
--cc=zokeefe@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.