From: Luka Bai <lukafocus@icloud.com>
To: linux-mm@kvack.org
Cc: Jonathan Corbet <corbet@lwn.net>,
Shuah Khan <skhan@linuxfoundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@kernel.org>,
Lorenzo Stoakes <ljs@kernel.org>, Zi Yan <ziy@nvidia.com>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
"Liam R. Howlett" <liam@infradead.org>,
Nico Pache <npache@redhat.com>,
Ryan Roberts <ryan.roberts@arm.com>, Dev Jain <dev.jain@arm.com>,
Barry Song <baohua@kernel.org>,
Lance Yang <lance.yang@linux.dev>,
Vlastimil Babka <vbabka@kernel.org>,
Mike Rapoport <rppt@kernel.org>,
Suren Baghdasaryan <surenb@google.com>,
Michal Hocko <mhocko@suse.com>, Jann Horn <jannh@google.com>,
Arnd Bergmann <arnd@arndb.de>, Kairui Song <kasong@tencent.com>,
linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
linux-doc@vger.kernel.org, Luka Bai <lukabai@tencent.com>
Subject: [PATCH 4/5] mm: enable map_anon_folio_pmd_nopf to handle unshare
Date: Fri, 01 May 2026 13:55:45 +0800 [thread overview]
Message-ID: <20260501-thp_cow-v1-4-005377483738@tencent.com> (raw)
In-Reply-To: <20260501-thp_cow-v1-0-005377483738@tencent.com>
From: Luka Bai <lukabai@tencent.com>
Function map_anon_folio_pmd_nopf was able to map new anonymous pages.
Like in function do_huge_pmd_anonymous_page, it handles all the mappings
and statistics correctly in one call. However, it doesn't support
FAULT_FLAG_UNSHARE.
Normally, FAULT_FLAG_UNSHARE was set when we just want to separate
multiple non-exclusive sharing apart, it follows the copy on write
process, since it also does the checking like whether we need to copy
memory, or just use the existing one, basically the same work like
what COW does. But it doesn't happen because of writing on a RO pte/pmd
which is actually permitted to be written to but simply for "unsharing".
Hence we need to copy the same permissive and other marker flags into
the copied new page table entry just like the old one when doing the
duplication, without making it writable. Now, map_anon_folio_pmd_nopf
only tries to make the new pmd writable that is not what unsharing wants.
We add unsharing support for map_anon_folio_pmd_nopf by passing the
vm_fault struct as a parameter and get the unsharing hint. If we are in
the unsharing procedure, then we just copy the soft_dirty and uffd_wp
flags into the new pmd instead of trying to make the new pmd writable.
Signed-off-by: Luka Bai <lukabai@tencent.com>
---
include/linux/huge_mm.h | 5 ++---
mm/huge_memory.c | 34 +++++++++++++++++++++++-----------
mm/khugepaged.c | 8 +++++++-
3 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3e5c6da3905b..61f0e614ca52 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -610,9 +610,8 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze);
bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmdp, struct folio *folio);
-void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd,
- struct vm_area_struct *vma, unsigned long haddr);
-
+void map_anon_folio_pmd_nopf(struct folio *folio, struct vm_fault *vmf,
+ bool cow);
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool folio_test_pmd_mappable(struct folio *folio)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index babca060feca..1e661b411b2e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1423,13 +1423,26 @@ static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
return folio;
}
-void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd,
- struct vm_area_struct *vma, unsigned long haddr)
+void map_anon_folio_pmd_nopf(struct folio *folio, struct vm_fault *vmf,
+ bool cow)
{
pmd_t entry;
+ struct vm_area_struct *vma = vmf->vma;
+ pmd_t *pmd = vmf->pmd;
+ pmd_t orig_pmd = vmf->orig_pmd;
+ unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
entry = folio_mk_pmd(folio, vma->vm_page_prot);
- entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (unlikely(cow && unshare)) {
+ VM_WARN_ON(pmd_write(orig_pmd));
+ if (pmd_soft_dirty(orig_pmd))
+ entry = pmd_mksoft_dirty(entry);
+ if (pmd_uffd_wp(orig_pmd))
+ entry = pmd_mkuffd_wp(entry);
+ } else {
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ }
folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma);
set_pmd_at(vma->vm_mm, haddr, pmd, entry);
@@ -1437,19 +1450,18 @@ void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd,
deferred_split_folio(folio, false);
}
-static void map_anon_folio_pmd_pf(struct folio *folio, pmd_t *pmd,
- struct vm_area_struct *vma, unsigned long haddr)
+static void map_anon_folio_pmd_pf(struct folio *folio, struct vm_fault *vmf,
+ bool cow)
{
- map_anon_folio_pmd_nopf(folio, pmd, vma, haddr);
- add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+ map_anon_folio_pmd_nopf(folio, vmf, cow);
+ add_mm_counter(vmf->vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
count_vm_event(THP_FAULT_ALLOC);
count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
- count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
+ count_memcg_event_mm(vmf->vma->vm_mm, THP_FAULT_ALLOC);
}
static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
{
- unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
struct vm_area_struct *vma = vmf->vma;
struct folio *folio;
pgtable_t pgtable;
@@ -1483,7 +1495,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
return ret;
}
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
- map_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);
+ map_anon_folio_pmd_pf(folio, vmf, false);
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(vmf->ptl);
}
@@ -2174,7 +2186,7 @@ static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
if (ret)
goto release;
(void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
- map_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);
+ map_anon_folio_pmd_pf(folio, vmf, true);
goto unlock;
release:
folio_put(folio);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7d48d4fbd5f3..18d309b69d30 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1402,7 +1402,13 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long s
if (is_pmd_order(order)) { /* PMD collapse */
pgtable = pmd_pgtable(_pmd);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
- map_anon_folio_pmd_nopf(folio, pmd, vma, pmd_addr);
+ struct vm_fault vmf = {
+ .vma = vma,
+ .flags = 0,
+ .address = pmd_addr,
+ .orig_pmd = pmdp_get(pmd),
+ };
+ map_anon_folio_pmd_nopf(folio, &vmf, false);
} else { /* mTHP collapse */
map_anon_folio_pte_nopf(folio, pte, vma, start_addr, /*uffd_wp=*/ false);
smp_wmb(); /* make PTEs visible before PMD. See pmd_install() */
--
2.52.0
next prev parent reply other threads:[~2026-05-01 5:56 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-01 5:55 [PATCH 0/5] mm: Support selecting doing direct COW for anonymous pmd entry Luka Bai
2026-05-01 5:55 ` [PATCH 1/5] mm: add basic madvise helpers and branch for THP setup Luka Bai
2026-05-01 5:55 ` [PATCH 2/5] mm: add pmd level THP COW parameter in sysfs Luka Bai
2026-05-01 5:55 ` [PATCH 3/5] mm: add pmd level THP COW judgement helpers Luka Bai
2026-05-01 5:55 ` Luka Bai [this message]
2026-05-01 5:55 ` [PATCH 5/5] mm: support choosing to do THP COW for anonymous pmd entry Luka Bai
2026-05-01 7:11 ` David Hildenbrand (Arm)
2026-05-01 15:01 ` Luka Bai
2026-05-01 7:07 ` [PATCH 0/5] mm: Support selecting doing direct " David Hildenbrand (Arm)
2026-05-01 16:16 ` Luka Bai
2026-05-01 18:30 ` David Hildenbrand (Arm)
2026-05-02 5:06 ` Luka Bai
2026-05-03 7:03 ` [syzbot ci] " syzbot ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260501-thp_cow-v1-4-005377483738@tencent.com \
--to=lukafocus@icloud.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=corbet@lwn.net \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=jannh@google.com \
--cc=kasong@tencent.com \
--cc=lance.yang@linux.dev \
--cc=liam@infradead.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=lukabai@tencent.com \
--cc=mhocko@suse.com \
--cc=npache@redhat.com \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=skhan@linuxfoundation.org \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox