* [PATCH v4 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 02/14] mm: rename uffd-wp PTE bit macros to uffd Kiryl Shutsemau (Meta)
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
pte_protnone() and pmd_protnone() detect present-but-inaccessible page
table entries. This capability is useful beyond NUMA balancing -- for
example, userfaultfd working set tracking uses protnone PTEs to track
page access without unmapping pages.
Introduce CONFIG_ARCH_HAS_PTE_PROTNONE to decouple the protnone PTE
infrastructure from CONFIG_NUMA_BALANCING. The six architectures that
support protnone PTEs (x86_64, arm64, powerpc, s390, riscv, loongarch)
now select this option, and CONFIG_NUMA_BALANCING depends on it.
No functional change -- the same set of architectures continues to have
working protnone support, but the infrastructure is now available
independently of NUMA balancing.
Signed-off-by: Kiryl Shutsemau (Meta) <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Acked-by: SeongJae Park <sj@kernel.org>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/pgtable.h | 7 ++---
arch/loongarch/Kconfig | 1 +
arch/loongarch/include/asm/pgtable.h | 4 +--
arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++---
arch/powerpc/platforms/Kconfig.cputype | 1 +
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable.h | 7 ++---
arch/s390/Kconfig | 1 +
arch/s390/include/asm/pgtable.h | 4 +--
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 8 ++---
include/linux/pgtable.h | 32 ++++++++++++++------
init/Kconfig | 8 +++++
mm/debug_vm_pgtable.c | 4 +--
15 files changed, 52 insertions(+), 36 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fe60738e5943..319470b3b1bb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -78,6 +78,7 @@ config ARM64
select ARCH_SUPPORTS_CFI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+ select ARCH_HAS_PTE_PROTNONE
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4dfa42b7d053..873f4ea2e288 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -553,10 +553,7 @@ static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * See the comment in include/linux/pgtable.h
- */
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pte_protnone(pte_t pte)
{
/*
@@ -575,7 +572,7 @@ static inline int pmd_protnone(pmd_t pmd)
{
return pte_protnone(pmd_pte(pmd));
}
-#endif
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 606597da46b8..77e9a9a30483 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -67,6 +67,7 @@ config LOONGARCH
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
+ select ARCH_HAS_PTE_PROTNONE
select ARCH_SUPPORTS_NUMA_BALANCING if NUMA
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_SUPPORTS_RT
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 2a0b63ae421f..d295447a2763 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -619,7 +619,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline long pte_protnone(pte_t pte)
{
return (pte_val(pte) & _PAGE_PROTNONE);
@@ -629,7 +629,7 @@ static inline long pmd_protnone(pmd_t pmd)
{
return (pmd_val(pmd) & _PAGE_PROTNONE);
}
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0)
#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e67e64ac6e8c..53a0c5892548 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -490,13 +490,13 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pte_protnone(pte_t pte)
{
return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
}
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
static inline bool pte_hw_valid(pte_t pte)
{
@@ -1067,12 +1067,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#endif
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
-#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pmd_protnone(pmd_t pmd)
{
return pte_protnone(pmd_pte(pmd));
}
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index bac02c83bb3e..36b64a24cf30 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -87,6 +87,7 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK
select ARCH_SUPPORTS_HUGETLBFS
+ select ARCH_HAS_PTE_PROTNONE
select ARCH_SUPPORTS_NUMA_BALANCING
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c5754942cf85..e2c5776d18cf 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -71,6 +71,7 @@ config RISCV
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
+ select ARCH_HAS_PTE_PROTNONE if MMU
select ARCH_SUPPORTS_RT
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_SCHED_MC if SMP
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a1a7c6520a09..48a127323b21 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -524,10 +524,7 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
PAGE_SIZE)
#endif
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * See the comment in include/asm-generic/pgtable.h
- */
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pte_protnone(pte_t pte)
{
return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
@@ -537,7 +534,7 @@ static inline int pmd_protnone(pmd_t pmd)
{
return pte_protnone(pmd_pte(pmd));
}
-#endif
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
/* Modify page protection bits */
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ecbcbb781e40..bc5bef08454b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -151,6 +151,7 @@ config S390
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
+ select ARCH_HAS_PTE_PROTNONE
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2c6cee8241e0..97241dea5573 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -842,7 +842,7 @@ static inline int pte_same(pte_t a, pte_t b)
return pte_val(a) == pte_val(b);
}
-#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pte_protnone(pte_t pte)
{
return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
@@ -853,7 +853,7 @@ static inline int pmd_protnone(pmd_t pmd)
/* pmd_leaf(pmd) implies pmd_present(pmd) */
return pmd_leaf(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
}
-#endif
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
static inline bool pte_swp_exclusive(pte_t pte)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f3f7cb01d69d..9da1119e8ff6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -123,6 +123,7 @@ config X86
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64
+ select ARCH_HAS_PTE_PROTNONE if X86_64
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_CFI if X86_64
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2187e9cfcefa..c7f014cbf0a9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -985,11 +985,7 @@ static inline int pmd_present(pmd_t pmd)
return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/linux/pgtable.h
- */
+#ifdef CONFIG_ARCH_HAS_PTE_PROTNONE
static inline int pte_protnone(pte_t pte)
{
return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
@@ -1001,7 +997,7 @@ static inline int pmd_protnone(pmd_t pmd)
return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
== _PAGE_PROTNONE;
}
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
static inline int pmd_none(pmd_t pmd)
{
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index cdd68ed3ae1a..b6516a11adfa 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -2052,18 +2052,26 @@ static inline int pud_trans_unstable(pud_t *pud)
return 0;
}
-#ifndef CONFIG_NUMA_BALANCING
+#ifndef CONFIG_ARCH_HAS_PTE_PROTNONE
/*
- * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
- * perfectly valid to indicate "no" in that case, which is why our default
- * implementation defaults to "always no".
+ * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It
+ * is perfectly valid to indicate "no" in that case, which is why our
+ * default implementation defaults to "always no".
*
- * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
- * page protection due to NUMA hinting. NUMA hinting faults only apply in
- * accessible VMAs.
+ * In an accessible VMA, pte_protnone() reliably indicates a present
+ * PROT_NONE page protection. Today the kernel uses such PTEs for two
+ * purposes: NUMA hinting faults, and userfaultfd RWP tracking on
+ * VM_UFFD_RWP VMAs. The two are distinguished by the uffd PTE bit and
+ * the VMA flag; see include/linux/userfaultfd_k.h.
*
- * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
- * looking at the VMA accessibility is sufficient.
+ * So, to reliably identify PROT_NONE PTEs that require kernel handling,
+ * looking at the VMA accessibility (and the uffd bit on RWP VMAs) is
+ * sufficient.
+ *
+ * Architectures without CONFIG_ARCH_HAS_PTE_PROTNONE get the always-zero
+ * stubs below; PAGE_NONE references that survive to runtime fire the
+ * BUILD_BUG() fallback, since callers should have folded such paths to
+ * dead code via IS_ENABLED(CONFIG_ARCH_HAS_PTE_PROTNONE).
*/
static inline int pte_protnone(pte_t pte)
{
@@ -2074,7 +2082,11 @@ static inline int pmd_protnone(pmd_t pmd)
{
return 0;
}
-#endif /* CONFIG_NUMA_BALANCING */
+
+#ifndef PAGE_NONE
+#define PAGE_NONE ({ BUILD_BUG(); (pgprot_t){0}; })
+#endif
+#endif /* CONFIG_ARCH_HAS_PTE_PROTNONE */
#endif /* CONFIG_MMU */
diff --git a/init/Kconfig b/init/Kconfig
index 2937c4d308ae..58abb7f19206 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -944,6 +944,13 @@ config SCHED_PROXY_EXEC
endmenu
+#
+# For architectures that support present-but-inaccessible (PROT_NONE) page
+# table entries detectable via pte_protnone() / pmd_protnone():
+#
+config ARCH_HAS_PTE_PROTNONE
+ bool
+
#
# For architectures that want to enable the support for NUMA-affine scheduler
# balancing logic:
@@ -1010,6 +1017,7 @@ config ARCH_WANT_NUMA_VARIABLE_LOCALITY
config NUMA_BALANCING
bool "Memory placement aware NUMA scheduler"
depends on ARCH_SUPPORTS_NUMA_BALANCING
+ depends on ARCH_HAS_PTE_PROTNONE
depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
depends on SMP && NUMA_MIGRATION && !PREEMPT_RT
help
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 23dc3ee09561..5e9f3a35f924 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -672,7 +672,7 @@ static void __init pte_protnone_tests(struct pgtable_debug_args *args)
{
pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
- if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_PROTNONE))
return;
pr_debug("Validating PTE protnone\n");
@@ -685,7 +685,7 @@ static void __init pmd_protnone_tests(struct pgtable_debug_args *args)
{
pmd_t pmd;
- if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_PROTNONE))
return;
if (!has_transparent_hugepage())
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 02/14] mm: rename uffd-wp PTE bit macros to uffd
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 03/14] mm: rename uffd-wp PTE accessors " Kiryl Shutsemau (Meta)
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
The uffd-wp PTE bit is about to gain a second consumer: userfaultfd
RWP will use the same bit to mark access-tracking PTEs, distinct
from mprotect(PROT_NONE) or NUMA-hinting PTEs. WP vs RWP semantics
come from the VMA flag; the bit is just "uffd has claimed this
entry." Drop the "_wp" suffix from the arch-private bit macros so
they reflect that.
x86: _PAGE_BIT_UFFD_WP -> _PAGE_BIT_UFFD
_PAGE_UFFD_WP -> _PAGE_UFFD
_PAGE_SWP_UFFD_WP -> _PAGE_SWP_UFFD
arm64: PTE_UFFD_WP -> PTE_UFFD
PTE_SWP_UFFD_WP -> PTE_SWP_UFFD
riscv: _PAGE_UFFD_WP -> _PAGE_UFFD
_PAGE_SWP_UFFD_WP -> _PAGE_SWP_UFFD
Pure mechanical rename -- no behavior change.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
---
arch/arm64/include/asm/pgtable-prot.h | 8 ++++----
arch/arm64/include/asm/pgtable.h | 12 ++++++------
arch/riscv/include/asm/pgtable-bits.h | 12 ++++++------
arch/riscv/include/asm/pgtable.h | 14 +++++++-------
arch/x86/include/asm/pgtable.h | 24 ++++++++++++------------
arch/x86/include/asm/pgtable_types.h | 16 ++++++++--------
6 files changed, 43 insertions(+), 43 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 212ce1b02e15..09d7c00cf405 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -28,11 +28,11 @@
#define PTE_PRESENT_VALID_KERNEL (PTE_VALID | PTE_MAYBE_NG)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */
-#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */
+#define PTE_UFFD (_AT(pteval_t, 1) << 58) /* userfaultfd tracking */
+#define PTE_SWP_UFFD (_AT(pteval_t, 1) << 3) /* only for swp ptes */
#else
-#define PTE_UFFD_WP (_AT(pteval_t, 0))
-#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0))
+#define PTE_UFFD (_AT(pteval_t, 0))
+#define PTE_SWP_UFFD (_AT(pteval_t, 0))
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 873f4ea2e288..3eecb2c17711 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -343,17 +343,17 @@ static inline pmd_t pmd_mknoncont(pmd_t pmd)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
- return !!(pte_val(pte) & PTE_UFFD_WP);
+ return !!(pte_val(pte) & PTE_UFFD);
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
{
- return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP)));
+ return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD)));
}
static inline pte_t pte_clear_uffd_wp(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP));
+ return clear_pte_bit(pte, __pgprot(PTE_UFFD));
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
@@ -539,17 +539,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+ return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD));
}
static inline int pte_swp_uffd_wp(pte_t pte)
{
- return !!(pte_val(pte) & PTE_SWP_UFFD_WP);
+ return !!(pte_val(pte) & PTE_SWP_UFFD);
}
static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+ return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD));
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index b422d9691e60..d5a86b4df3ce 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -40,20 +40,20 @@
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-/* ext_svrsw60t59b: Bit(60) for uffd-wp tracking */
-#define _PAGE_UFFD_WP \
+/* ext_svrsw60t59b: Bit(60) for userfaultfd tracking */
+#define _PAGE_UFFD \
((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
(1UL << 60) : 0)
/*
* Bit 4 is not involved into swap entry computation, so we
- * can borrow it for swap page uffd-wp tracking.
+ * can borrow it for swap page userfaultfd tracking.
*/
-#define _PAGE_SWP_UFFD_WP \
+#define _PAGE_SWP_UFFD \
((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
_PAGE_USER : 0)
#else
-#define _PAGE_UFFD_WP 0
-#define _PAGE_SWP_UFFD_WP 0
+#define _PAGE_UFFD 0
+#define _PAGE_SWP_UFFD 0
#endif
#define _PAGE_TABLE _PAGE_PRESENT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 48a127323b21..ca69948b3ed8 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -405,32 +405,32 @@ static inline pte_t pte_wrprotect(pte_t pte)
static inline bool pte_uffd_wp(pte_t pte)
{
- return !!(pte_val(pte) & _PAGE_UFFD_WP);
+ return !!(pte_val(pte) & _PAGE_UFFD);
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
{
- return pte_wrprotect(__pte(pte_val(pte) | _PAGE_UFFD_WP));
+ return pte_wrprotect(__pte(pte_val(pte) | _PAGE_UFFD));
}
static inline pte_t pte_clear_uffd_wp(pte_t pte)
{
- return __pte(pte_val(pte) & ~(_PAGE_UFFD_WP));
+ return __pte(pte_val(pte) & ~(_PAGE_UFFD));
}
static inline bool pte_swp_uffd_wp(pte_t pte)
{
- return !!(pte_val(pte) & _PAGE_SWP_UFFD_WP);
+ return !!(pte_val(pte) & _PAGE_SWP_UFFD);
}
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SWP_UFFD_WP);
+ return __pte(pte_val(pte) | _PAGE_SWP_UFFD);
}
static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
- return __pte(pte_val(pte) & ~(_PAGE_SWP_UFFD_WP));
+ return __pte(pte_val(pte) & ~(_PAGE_SWP_UFFD));
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
@@ -1157,7 +1157,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
* bit 0: _PAGE_PRESENT (zero)
* bit 1 to 2: (zero)
* bit 3: _PAGE_SWP_SOFT_DIRTY
- * bit 4: _PAGE_SWP_UFFD_WP
+ * bit 4: _PAGE_SWP_UFFD
* bit 5: _PAGE_PROT_NONE (zero)
* bit 6: exclusive marker
* bits 7 to 11: swap type
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c7f014cbf0a9..038c806b50a2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -413,17 +413,17 @@ static inline pte_t pte_wrprotect(pte_t pte)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
- return pte_flags(pte) & _PAGE_UFFD_WP;
+ return pte_flags(pte) & _PAGE_UFFD;
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
{
- return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP));
+ return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD));
}
static inline pte_t pte_clear_uffd_wp(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_UFFD_WP);
+ return pte_clear_flags(pte, _PAGE_UFFD);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
@@ -528,17 +528,17 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pmd_uffd_wp(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_UFFD_WP;
+ return pmd_flags(pmd) & _PAGE_UFFD;
}
static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
- return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP));
+ return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD));
}
static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
+ return pmd_clear_flags(pmd, _PAGE_UFFD);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
@@ -1550,32 +1550,32 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
- return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
+ return pte_set_flags(pte, _PAGE_SWP_UFFD);
}
static inline int pte_swp_uffd_wp(pte_t pte)
{
- return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
+ return pte_flags(pte) & _PAGE_SWP_UFFD;
}
static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
+ return pte_clear_flags(pte, _PAGE_SWP_UFFD);
}
static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
+ return pmd_set_flags(pmd, _PAGE_SWP_UFFD);
}
static inline int pmd_swp_uffd_wp(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
+ return pmd_flags(pmd) & _PAGE_SWP_UFFD;
}
static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
+ return pmd_clear_flags(pmd, _PAGE_SWP_UFFD);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 2ec250ba467e..af08d98be930 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -31,7 +31,7 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
-#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
+#define _PAGE_BIT_UFFD _PAGE_BIT_SOFTW2 /* userfaultfd tracking */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
@@ -39,7 +39,7 @@
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
#else
-/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+/* Shared with _PAGE_BIT_UFFD which is not supported on 32 bit */
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
#endif
@@ -111,11 +111,11 @@
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
-#define _PAGE_SWP_UFFD_WP _PAGE_USER
+#define _PAGE_UFFD (_AT(pteval_t, 1) << _PAGE_BIT_UFFD)
+#define _PAGE_SWP_UFFD _PAGE_USER
#else
-#define _PAGE_UFFD_WP (_AT(pteval_t, 0))
-#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0))
+#define _PAGE_UFFD (_AT(pteval_t, 0))
+#define _PAGE_SWP_UFFD (_AT(pteval_t, 0))
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
@@ -129,7 +129,7 @@
/*
* The hardware requires shadow stack to be Write=0,Dirty=1. However,
* there are valid cases where the kernel might create read-only PTEs that
- * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
+ * are dirty (e.g., fork(), mprotect(), userfaultfd, soft-dirty tracking). In
* this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
* to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
* (Write=0,SavedDirty=1,Dirty=0) set.
@@ -151,7 +151,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_CC | _PAGE_UFFD_WP)
+ _PAGE_CC | _PAGE_UFFD)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 03/14] mm: rename uffd-wp PTE accessors to uffd
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 02/14] mm: rename uffd-wp PTE bit macros to uffd Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 04/14] mm: add VM_UFFD_RWP VMA flag Kiryl Shutsemau (Meta)
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Userfaultfd RWP will reuse the uffd-wp PTE bit to mark access-tracking
PTEs, alongside the write-protected ones it already marks. The bit's
meaning now depends on the VMA flag (WP or RWP), not on its name.
Rename the kernel-internal names that describe the bit:
- pte/pmd/huge_pte accessors (and swap variants)
- pgtable_supports_uffd() capability query
- SCAN_PTE_UFFD khugepaged enum
The ftrace string emitted by mm_khugepaged_scan_pmd for this enum is
kept as "pte_uffd_wp" so existing trace-based tooling keeps matching.
Pure mechanical rename -- no behavior change.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
---
arch/arm64/include/asm/pgtable.h | 28 ++++++++--------
arch/riscv/include/asm/pgtable.h | 38 +++++++++++-----------
arch/s390/include/asm/hugetlb.h | 12 +++----
arch/x86/include/asm/pgtable.h | 24 +++++++-------
fs/proc/task_mmu.c | 44 ++++++++++++-------------
include/asm-generic/hugetlb.h | 18 +++++------
include/asm-generic/pgtable_uffd.h | 32 +++++++++---------
include/linux/leafops.h | 4 +--
include/linux/mm_inline.h | 4 +--
include/linux/swapops.h | 4 +--
include/linux/userfaultfd_k.h | 14 ++++----
include/trace/events/huge_memory.h | 2 +-
mm/huge_memory.c | 52 +++++++++++++++---------------
mm/hugetlb.c | 46 +++++++++++++-------------
mm/internal.h | 4 +--
mm/khugepaged.c | 22 ++++++-------
mm/memory.c | 34 +++++++++----------
mm/migrate.c | 12 +++----
mm/migrate_device.c | 8 ++---
mm/mprotect.c | 12 +++----
mm/mremap.c | 4 +--
mm/page_table_check.c | 8 ++---
mm/rmap.c | 16 ++++-----
mm/swapfile.c | 4 +--
mm/userfaultfd.c | 6 ++--
25 files changed, 226 insertions(+), 226 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3eecb2c17711..c41e4d59dc9f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -341,17 +341,17 @@ static inline pmd_t pmd_mknoncont(pmd_t pmd)
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline int pte_uffd_wp(pte_t pte)
+static inline int pte_uffd(pte_t pte)
{
return !!(pte_val(pte) & PTE_UFFD);
}
-static inline pte_t pte_mkuffd_wp(pte_t pte)
+static inline pte_t pte_mkuffd(pte_t pte)
{
return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD)));
}
-static inline pte_t pte_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_clear_uffd(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_UFFD));
}
@@ -537,17 +537,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+static inline pte_t pte_swp_mkuffd(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD));
}
-static inline int pte_swp_uffd_wp(pte_t pte)
+static inline int pte_swp_uffd(pte_t pte)
{
return !!(pte_val(pte) & PTE_SWP_UFFD);
}
-static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_swp_clear_uffd(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD));
}
@@ -590,13 +590,13 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_mkvalid_k(pmd) pte_pmd(pte_mkvalid_k(pmd_pte(pmd)))
#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd))
-#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)))
-#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)))
-#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd))
-#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)))
-#define pmd_swp_clear_uffd_wp(pmd) \
- pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)))
+#define pmd_uffd(pmd) pte_uffd(pmd_pte(pmd))
+#define pmd_mkuffd(pmd) pte_pmd(pte_mkuffd(pmd_pte(pmd)))
+#define pmd_clear_uffd(pmd) pte_pmd(pte_clear_uffd(pmd_pte(pmd)))
+#define pmd_swp_uffd(pmd) pte_swp_uffd(pmd_pte(pmd))
+#define pmd_swp_mkuffd(pmd) pte_pmd(pte_swp_mkuffd(pmd_pte(pmd)))
+#define pmd_swp_clear_uffd(pmd) \
+ pte_pmd(pte_swp_clear_uffd(pmd_pte(pmd)))
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
@@ -1512,7 +1512,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
* bits 2: remember PG_anon_exclusive
- * bit 3: remember uffd-wp state
+ * bit 3: remember uffd state
* bits 6-10: swap type
* bit 11: PTE_PRESENT_INVALID (must be zero)
* bits 12-61: swap offset
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ca69948b3ed8..b111e134795e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -400,35 +400,35 @@ static inline pte_t pte_wrprotect(pte_t pte)
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-#define pgtable_supports_uffd_wp() \
+#define pgtable_supports_uffd() \
riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)
-static inline bool pte_uffd_wp(pte_t pte)
+static inline bool pte_uffd(pte_t pte)
{
return !!(pte_val(pte) & _PAGE_UFFD);
}
-static inline pte_t pte_mkuffd_wp(pte_t pte)
+static inline pte_t pte_mkuffd(pte_t pte)
{
return pte_wrprotect(__pte(pte_val(pte) | _PAGE_UFFD));
}
-static inline pte_t pte_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_clear_uffd(pte_t pte)
{
return __pte(pte_val(pte) & ~(_PAGE_UFFD));
}
-static inline bool pte_swp_uffd_wp(pte_t pte)
+static inline bool pte_swp_uffd(pte_t pte)
{
return !!(pte_val(pte) & _PAGE_SWP_UFFD);
}
-static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+static inline pte_t pte_swp_mkuffd(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SWP_UFFD);
}
-static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_swp_clear_uffd(pte_t pte)
{
return __pte(pte_val(pte) & ~(_PAGE_SWP_UFFD));
}
@@ -886,34 +886,34 @@ static inline pud_t pud_mkspecial(pud_t pud)
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline bool pmd_uffd_wp(pmd_t pmd)
+static inline bool pmd_uffd(pmd_t pmd)
{
- return pte_uffd_wp(pmd_pte(pmd));
+ return pte_uffd(pmd_pte(pmd));
}
-static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+static inline pmd_t pmd_mkuffd(pmd_t pmd)
{
- return pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)));
+ return pte_pmd(pte_mkuffd(pmd_pte(pmd)));
}
-static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+static inline pmd_t pmd_clear_uffd(pmd_t pmd)
{
- return pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)));
+ return pte_pmd(pte_clear_uffd(pmd_pte(pmd)));
}
-static inline bool pmd_swp_uffd_wp(pmd_t pmd)
+static inline bool pmd_swp_uffd(pmd_t pmd)
{
- return pte_swp_uffd_wp(pmd_pte(pmd));
+ return pte_swp_uffd(pmd_pte(pmd));
}
-static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_mkuffd(pmd_t pmd)
{
- return pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)));
+ return pte_pmd(pte_swp_mkuffd(pmd_pte(pmd)));
}
-static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_clear_uffd(pmd_t pmd)
{
- return pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)));
+ return pte_pmd(pte_swp_clear_uffd(pmd_pte(pmd)));
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 6983e52eaf81..cf8a176ff3d8 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -77,20 +77,20 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
-#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
-static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
+#define __HAVE_ARCH_HUGE_PTE_MKUFFD
+static inline pte_t huge_pte_mkuffd(pte_t pte)
{
return pte;
}
-#define __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
-static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
+#define __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD
+static inline pte_t huge_pte_clear_uffd(pte_t pte)
{
return pte;
}
-#define __HAVE_ARCH_HUGE_PTE_UFFD_WP
-static inline int huge_pte_uffd_wp(pte_t pte)
+#define __HAVE_ARCH_HUGE_PTE_UFFD
+static inline int huge_pte_uffd(pte_t pte)
{
return 0;
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 038c806b50a2..d14c84b2a332 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -411,17 +411,17 @@ static inline pte_t pte_wrprotect(pte_t pte)
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline int pte_uffd_wp(pte_t pte)
+static inline int pte_uffd(pte_t pte)
{
return pte_flags(pte) & _PAGE_UFFD;
}
-static inline pte_t pte_mkuffd_wp(pte_t pte)
+static inline pte_t pte_mkuffd(pte_t pte)
{
return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD));
}
-static inline pte_t pte_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_clear_uffd(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_UFFD);
}
@@ -526,17 +526,17 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline int pmd_uffd_wp(pmd_t pmd)
+static inline int pmd_uffd(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_UFFD;
}
-static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+static inline pmd_t pmd_mkuffd(pmd_t pmd)
{
return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD));
}
-static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+static inline pmd_t pmd_clear_uffd(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_UFFD);
}
@@ -1548,32 +1548,32 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+static inline pte_t pte_swp_mkuffd(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SWP_UFFD);
}
-static inline int pte_swp_uffd_wp(pte_t pte)
+static inline int pte_swp_uffd(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_UFFD;
}
-static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+static inline pte_t pte_swp_clear_uffd(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_UFFD);
}
-static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_mkuffd(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_SWP_UFFD);
}
-static inline int pmd_swp_uffd_wp(pmd_t pmd)
+static inline int pmd_swp_uffd(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_SWP_UFFD;
}
-static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_clear_uffd(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_SWP_UFFD);
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1e3a15bf46f4..cbd164f4928f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2035,14 +2035,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
- if (pte_uffd_wp(pte))
+ if (pte_uffd(pte))
flags |= PM_UFFD_WP;
} else {
softleaf_t entry;
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
- if (pte_swp_uffd_wp(pte))
+ if (pte_swp_uffd(pte))
flags |= PM_UFFD_WP;
entry = softleaf_from_pte(pte);
if (pm->show_pfn) {
@@ -2108,7 +2108,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
- if (pmd_uffd_wp(pmd))
+ if (pmd_uffd(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
frame = pmd_pfn(pmd) + idx;
@@ -2127,7 +2127,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
- if (pmd_swp_uffd_wp(pmd))
+ if (pmd_swp_uffd(pmd))
flags |= PM_UFFD_WP;
VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd));
page = softleaf_to_page(entry);
@@ -2233,14 +2233,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
!hugetlb_pmd_shared(ptep))
flags |= PM_MMAP_EXCLUSIVE;
- if (huge_pte_uffd_wp(pte))
+ if (huge_pte_uffd(pte))
flags |= PM_UFFD_WP;
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pte_pfn(pte) +
((addr & ~hmask) >> PAGE_SHIFT);
- } else if (pte_swp_uffd_wp_any(pte)) {
+ } else if (pte_swp_uffd_any(pte)) {
flags |= PM_UFFD_WP;
}
@@ -2441,7 +2441,7 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories = PAGE_IS_PRESENT;
- if (!pte_uffd_wp(pte))
+ if (!pte_uffd(pte))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
@@ -2459,7 +2459,7 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories = PAGE_IS_SWAPPED;
- if (!pte_swp_uffd_wp_any(pte))
+ if (!pte_swp_uffd_any(pte))
categories |= PAGE_IS_WRITTEN;
entry = softleaf_from_pte(pte);
@@ -2484,13 +2484,13 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
pte_t old_pte;
old_pte = ptep_modify_prot_start(vma, addr, pte);
- ptent = pte_mkuffd_wp(old_pte);
+ ptent = pte_mkuffd(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
} else {
- ptent = pte_swp_mkuffd_wp(ptent);
+ ptent = pte_swp_mkuffd(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
@@ -2509,7 +2509,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
struct page *page;
categories |= PAGE_IS_PRESENT;
- if (!pmd_uffd_wp(pmd))
+ if (!pmd_uffd(pmd))
categories |= PAGE_IS_WRITTEN;
if (p->masks_of_interest & PAGE_IS_FILE) {
@@ -2524,7 +2524,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_SOFT_DIRTY;
} else {
categories |= PAGE_IS_SWAPPED;
- if (!pmd_swp_uffd_wp(pmd))
+ if (!pmd_swp_uffd(pmd))
categories |= PAGE_IS_WRITTEN;
if (pmd_swp_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
@@ -2548,10 +2548,10 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma,
if (pmd_present(pmd)) {
old = pmdp_invalidate_ad(vma, addr, pmdp);
- pmd = pmd_mkuffd_wp(old);
+ pmd = pmd_mkuffd(old);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (pmd_is_migration_entry(pmd)) {
- pmd = pmd_swp_mkuffd_wp(pmd);
+ pmd = pmd_swp_mkuffd(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
}
@@ -2573,7 +2573,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
- if (!huge_pte_uffd_wp(pte))
+ if (!huge_pte_uffd(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
categories |= PAGE_IS_FILE;
@@ -2584,7 +2584,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
} else {
categories |= PAGE_IS_SWAPPED;
- if (!pte_swp_uffd_wp_any(pte))
+ if (!pte_swp_uffd_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
@@ -2612,10 +2612,10 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
if (softleaf_is_migration(entry))
set_huge_pte_at(vma->vm_mm, addr, ptep,
- pte_swp_mkuffd_wp(ptent), psize);
+ pte_swp_mkuffd(ptent), psize);
else
huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
- huge_pte_mkuffd_wp(ptent));
+ huge_pte_mkuffd(ptent));
}
#endif /* CONFIG_HUGETLB_PAGE */
@@ -2846,8 +2846,8 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
pte_t ptent = ptep_get(pte);
- if ((pte_present(ptent) && pte_uffd_wp(ptent)) ||
- pte_swp_uffd_wp_any(ptent))
+ if ((pte_present(ptent) && pte_uffd(ptent)) ||
+ pte_swp_uffd_any(ptent))
continue;
make_uffd_wp_pte(vma, addr, pte, ptent);
if (!flush_end)
@@ -2864,8 +2864,8 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
unsigned long next = addr + PAGE_SIZE;
pte_t ptent = ptep_get(pte);
- if ((pte_present(ptent) && pte_uffd_wp(ptent)) ||
- pte_swp_uffd_wp_any(ptent))
+ if ((pte_present(ptent) && pte_uffd(ptent)) ||
+ pte_swp_uffd_any(ptent))
continue;
ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN,
p, addr, &next);
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index e1a2e1b7c8e7..635c41cc3479 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -37,24 +37,24 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
return pte_modify(pte, newprot);
}
-#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
-static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
+#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD
+static inline pte_t huge_pte_mkuffd(pte_t pte)
{
- return huge_pte_wrprotect(pte_mkuffd_wp(pte));
+ return huge_pte_wrprotect(pte_mkuffd(pte));
}
#endif
-#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
-static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
+#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD
+static inline pte_t huge_pte_clear_uffd(pte_t pte)
{
- return pte_clear_uffd_wp(pte);
+ return pte_clear_uffd(pte);
}
#endif
-#ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP
-static inline int huge_pte_uffd_wp(pte_t pte)
+#ifndef __HAVE_ARCH_HUGE_PTE_UFFD
+static inline int huge_pte_uffd(pte_t pte)
{
- return pte_uffd_wp(pte);
+ return pte_uffd(pte);
}
#endif
diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h
index 0d85791efdf7..30e88fc1de2f 100644
--- a/include/asm-generic/pgtable_uffd.h
+++ b/include/asm-generic/pgtable_uffd.h
@@ -2,79 +2,79 @@
#define _ASM_GENERIC_PGTABLE_UFFD_H
/*
- * Some platforms can customize the uffd-wp bit, making it unavailable
+ * Some platforms can customize the uffd PTE bit, making it unavailable
* even if the architecture provides the resource.
* Adding this API allows architectures to add their own checks for the
* devices on which the kernel is running.
* Note: When overriding it, please make sure the
* CONFIG_HAVE_ARCH_USERFAULTFD_WP is part of this macro.
*/
-#ifndef pgtable_supports_uffd_wp
-#define pgtable_supports_uffd_wp() IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP)
+#ifndef pgtable_supports_uffd
+#define pgtable_supports_uffd() IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP)
#endif
static inline bool uffd_supports_wp_marker(void)
{
- return pgtable_supports_uffd_wp() && IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP);
+ return pgtable_supports_uffd() && IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP);
}
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
-static __always_inline int pte_uffd_wp(pte_t pte)
+static __always_inline int pte_uffd(pte_t pte)
{
return 0;
}
-static __always_inline int pmd_uffd_wp(pmd_t pmd)
+static __always_inline int pmd_uffd(pmd_t pmd)
{
return 0;
}
-static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
+static __always_inline pte_t pte_mkuffd(pte_t pte)
{
return pte;
}
-static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+static __always_inline pmd_t pmd_mkuffd(pmd_t pmd)
{
return pmd;
}
-static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
+static __always_inline pte_t pte_clear_uffd(pte_t pte)
{
return pte;
}
-static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+static __always_inline pmd_t pmd_clear_uffd(pmd_t pmd)
{
return pmd;
}
-static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+static __always_inline pte_t pte_swp_mkuffd(pte_t pte)
{
return pte;
}
-static __always_inline int pte_swp_uffd_wp(pte_t pte)
+static __always_inline int pte_swp_uffd(pte_t pte)
{
return 0;
}
-static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+static __always_inline pte_t pte_swp_clear_uffd(pte_t pte)
{
return pte;
}
-static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_mkuffd(pmd_t pmd)
{
return pmd;
}
-static inline int pmd_swp_uffd_wp(pmd_t pmd)
+static inline int pmd_swp_uffd(pmd_t pmd)
{
return 0;
}
-static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+static inline pmd_t pmd_swp_clear_uffd(pmd_t pmd)
{
return pmd;
}
diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index 992cd8bd8ed0..2ce2f37ac883 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -100,8 +100,8 @@ static inline softleaf_t softleaf_from_pmd(pmd_t pmd)
if (pmd_swp_soft_dirty(pmd))
pmd = pmd_swp_clear_soft_dirty(pmd);
- if (pmd_swp_uffd_wp(pmd))
- pmd = pmd_swp_clear_uffd_wp(pmd);
+ if (pmd_swp_uffd(pmd))
+ pmd = pmd_swp_clear_uffd(pmd);
arch_entry = __pmd_to_swp_entry(pmd);
/* Temporary until swp_entry_t eliminated. */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index a171070e15f0..2811caf4188d 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -600,14 +600,14 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
return false;
/* A uffd-wp wr-protected normal pte */
- if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval)))
+ if (unlikely(pte_present(pteval) && pte_uffd(pteval)))
arm_uffd_pte = true;
/*
* A uffd-wp wr-protected swap pte. Note: this should even cover an
* existing pte marker with uffd-wp bit set.
*/
- if (unlikely(pte_swp_uffd_wp_any(pteval)))
+ if (unlikely(pte_swp_uffd_any(pteval)))
arm_uffd_pte = true;
if (unlikely(arm_uffd_pte)) {
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 8cfc966eae48..15c6440e38dd 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -73,8 +73,8 @@ static inline pte_t pte_swp_clear_flags(pte_t pte)
pte = pte_swp_clear_exclusive(pte);
if (pte_swp_soft_dirty(pte))
pte = pte_swp_clear_soft_dirty(pte);
- if (pte_swp_uffd_wp(pte))
- pte = pte_swp_clear_uffd_wp(pte);
+ if (pte_swp_uffd(pte))
+ pte = pte_swp_clear_uffd(pte);
return pte;
}
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 3ec8e1071673..f4cf5763f92c 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -211,13 +211,13 @@ static inline bool userfaultfd_minor(struct vm_area_struct *vma)
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
- return userfaultfd_wp(vma) && pte_uffd_wp(pte);
+ return userfaultfd_wp(vma) && pte_uffd(pte);
}
static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
pmd_t pmd)
{
- return userfaultfd_wp(vma) && pmd_uffd_wp(pmd);
+ return userfaultfd_wp(vma) && pmd_uffd(pmd);
}
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
@@ -272,10 +272,10 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
}
/*
- * Returns true if this is a swap pte and was uffd-wp wr-protected in either
- * forms (pte marker or a normal swap pte), false otherwise.
+ * Returns true if this swap pte carries uffd-tracked state in either
+ * form (pte marker or a normal swap pte), false otherwise.
*/
-static inline bool pte_swp_uffd_wp_any(pte_t pte)
+static inline bool pte_swp_uffd_any(pte_t pte)
{
if (!uffd_supports_wp_marker())
return false;
@@ -283,7 +283,7 @@ static inline bool pte_swp_uffd_wp_any(pte_t pte)
if (pte_present(pte))
return false;
- if (pte_swp_uffd_wp(pte))
+ if (pte_swp_uffd(pte))
return true;
if (pte_is_uffd_wp_marker(pte))
@@ -424,7 +424,7 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
* Returns true if this is a swap pte and was uffd-wp wr-protected in either
* forms (pte marker or a normal swap pte), false otherwise.
*/
-static inline bool pte_swp_uffd_wp_any(pte_t pte)
+static inline bool pte_swp_uffd_any(pte_t pte)
{
return false;
}
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 291fae364c62..5a48c5406cce 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -16,7 +16,7 @@
EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \
EM( SCAN_EXCEED_SHARED_PTE, "exceed_shared_pte") \
EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
- EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \
+ EM( SCAN_PTE_UFFD, "pte_uffd_wp") \
EM( SCAN_PTE_MAPPED_HUGEPAGE, "pte_mapped_hugepage") \
EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \
EM( SCAN_PAGE_NULL, "page_null") \
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 42b86e8ab7c0..6017c73c92a0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1909,8 +1909,8 @@ static void copy_huge_non_present_pmd(
pmd = swp_entry_to_pmd(entry);
if (pmd_swp_soft_dirty(*src_pmd))
pmd = pmd_swp_mksoft_dirty(pmd);
- if (pmd_swp_uffd_wp(*src_pmd))
- pmd = pmd_swp_mkuffd_wp(pmd);
+ if (pmd_swp_uffd(*src_pmd))
+ pmd = pmd_swp_mkuffd(pmd);
set_pmd_at(src_mm, addr, src_pmd, pmd);
} else if (softleaf_is_device_private(entry)) {
/*
@@ -1923,8 +1923,8 @@ static void copy_huge_non_present_pmd(
if (pmd_swp_soft_dirty(*src_pmd))
pmd = pmd_swp_mksoft_dirty(pmd);
- if (pmd_swp_uffd_wp(*src_pmd))
- pmd = pmd_swp_mkuffd_wp(pmd);
+ if (pmd_swp_uffd(*src_pmd))
+ pmd = pmd_swp_mkuffd(pmd);
set_pmd_at(src_mm, addr, src_pmd, pmd);
}
@@ -1944,7 +1944,7 @@ static void copy_huge_non_present_pmd(
mm_inc_nr_ptes(dst_mm);
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
if (!userfaultfd_wp(dst_vma))
- pmd = pmd_swp_clear_uffd_wp(pmd);
+ pmd = pmd_swp_clear_uffd(pmd);
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
}
@@ -2040,7 +2040,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
pmdp_set_wrprotect(src_mm, addr, src_pmd);
if (!userfaultfd_wp(dst_vma))
- pmd = pmd_clear_uffd_wp(pmd);
+ pmd = pmd_clear_uffd(pmd);
pmd = pmd_wrprotect(pmd);
set_pmd:
pmd = pmd_mkold(pmd);
@@ -2581,9 +2581,9 @@ static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
if (pmd_none(pmd))
return pmd;
if (pmd_present(pmd))
- pmd = pmd_clear_uffd_wp(pmd);
+ pmd = pmd_clear_uffd(pmd);
else
- pmd = pmd_swp_clear_uffd_wp(pmd);
+ pmd = pmd_swp_clear_uffd(pmd);
return pmd;
}
@@ -2668,9 +2668,9 @@ static void change_non_present_huge_pmd(struct mm_struct *mm,
}
if (uffd_wp)
- newpmd = pmd_swp_mkuffd_wp(newpmd);
+ newpmd = pmd_swp_mkuffd(newpmd);
else if (uffd_wp_resolve)
- newpmd = pmd_swp_clear_uffd_wp(newpmd);
+ newpmd = pmd_swp_clear_uffd(newpmd);
if (!pmd_same(*pmd, newpmd))
set_pmd_at(mm, addr, pmd, newpmd);
}
@@ -2751,14 +2751,14 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
entry = pmd_modify(oldpmd, newprot);
if (uffd_wp)
- entry = pmd_mkuffd_wp(entry);
+ entry = pmd_mkuffd(entry);
else if (uffd_wp_resolve)
/*
* Leave the write bit to be handled by PF interrupt
* handler, then things like COW could be properly
* handled.
*/
- entry = pmd_clear_uffd_wp(entry);
+ entry = pmd_clear_uffd(entry);
/* See change_pte_range(). */
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
@@ -3101,8 +3101,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
entry = pfn_pte(zero_pfn(addr), vma->vm_page_prot);
entry = pte_mkspecial(entry);
- if (pmd_uffd_wp(old_pmd))
- entry = pte_mkuffd_wp(entry);
+ if (pmd_uffd(old_pmd))
+ entry = pte_mkuffd(entry);
VM_BUG_ON(!pte_none(ptep_get(pte)));
set_pte_at(mm, addr, pte, entry);
pte++;
@@ -3186,7 +3186,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
folio = page_folio(page);
soft_dirty = pmd_swp_soft_dirty(old_pmd);
- uffd_wp = pmd_swp_uffd_wp(old_pmd);
+ uffd_wp = pmd_swp_uffd(old_pmd);
write = softleaf_is_migration_write(entry);
if (PageAnon(page))
@@ -3202,7 +3202,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
folio = page_folio(page);
soft_dirty = pmd_swp_soft_dirty(old_pmd);
- uffd_wp = pmd_swp_uffd_wp(old_pmd);
+ uffd_wp = pmd_swp_uffd(old_pmd);
write = softleaf_is_device_private_write(entry);
anon_exclusive = PageAnonExclusive(page);
@@ -3259,7 +3259,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
write = pmd_write(old_pmd);
young = pmd_young(old_pmd);
soft_dirty = pmd_soft_dirty(old_pmd);
- uffd_wp = pmd_uffd_wp(old_pmd);
+ uffd_wp = pmd_uffd(old_pmd);
VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
@@ -3330,7 +3330,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (soft_dirty)
entry = pte_swp_mksoft_dirty(entry);
if (uffd_wp)
- entry = pte_swp_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd(entry);
VM_WARN_ON(!pte_none(ptep_get(pte + i)));
set_pte_at(mm, addr, pte + i, entry);
}
@@ -3357,7 +3357,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (soft_dirty)
entry = pte_swp_mksoft_dirty(entry);
if (uffd_wp)
- entry = pte_swp_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd(entry);
VM_WARN_ON(!pte_none(ptep_get(pte + i)));
set_pte_at(mm, addr, pte + i, entry);
}
@@ -3375,7 +3375,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (soft_dirty)
entry = pte_mksoft_dirty(entry);
if (uffd_wp)
- entry = pte_mkuffd_wp(entry);
+ entry = pte_mkuffd(entry);
for (i = 0; i < HPAGE_PMD_NR; i++)
VM_WARN_ON(!pte_none(ptep_get(pte + i)));
@@ -5016,8 +5016,8 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
pmdswp = swp_entry_to_pmd(entry);
if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp);
- if (pmd_uffd_wp(pmdval))
- pmdswp = pmd_swp_mkuffd_wp(pmdswp);
+ if (pmd_uffd(pmdval))
+ pmdswp = pmd_swp_mkuffd(pmdswp);
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
folio_remove_rmap_pmd(folio, page, vma);
folio_put(folio);
@@ -5047,8 +5047,8 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
pmde = pmd_mksoft_dirty(pmde);
if (softleaf_is_migration_write(entry))
pmde = pmd_mkwrite(pmde, vma);
- if (pmd_swp_uffd_wp(*pvmw->pmd))
- pmde = pmd_mkuffd_wp(pmde);
+ if (pmd_swp_uffd(*pvmw->pmd))
+ pmde = pmd_mkuffd(pmde);
if (!softleaf_is_migration_young(entry))
pmde = pmd_mkold(pmde);
/* NOTE: this may contain setting soft-dirty on some archs */
@@ -5068,8 +5068,8 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_swp_mksoft_dirty(pmde);
- if (pmd_swp_uffd_wp(*pvmw->pmd))
- pmde = pmd_swp_mkuffd_wp(pmde);
+ if (pmd_swp_uffd(*pvmw->pmd))
+ pmde = pmd_swp_mkuffd(pmde);
}
if (folio_test_anon(folio)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 571212b80835..d0c81a056ae2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4843,8 +4843,8 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
__folio_mark_uptodate(new_folio);
hugetlb_add_new_anon_rmap(new_folio, vma, addr);
- if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
- newpte = huge_pte_mkuffd_wp(newpte);
+ if (userfaultfd_wp(vma) && huge_pte_uffd(old))
+ newpte = huge_pte_mkuffd(newpte);
set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
@@ -4918,10 +4918,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
softleaf = softleaf_from_pte(entry);
if (unlikely(softleaf_is_hwpoison(softleaf))) {
if (!userfaultfd_wp(dst_vma))
- entry = huge_pte_clear_uffd_wp(entry);
+ entry = huge_pte_clear_uffd(entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(softleaf_is_migration(softleaf))) {
- bool uffd_wp = pte_swp_uffd_wp(entry);
+ bool uffd = pte_swp_uffd(entry);
if (!softleaf_is_migration_read(softleaf) && cow) {
/*
@@ -4931,12 +4931,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
softleaf = make_readable_migration_entry(
swp_offset(softleaf));
entry = swp_entry_to_pte(softleaf);
- if (userfaultfd_wp(src_vma) && uffd_wp)
- entry = pte_swp_mkuffd_wp(entry);
+ if (userfaultfd_wp(src_vma) && uffd)
+ entry = pte_swp_mkuffd(entry);
set_huge_pte_at(src, addr, src_pte, entry, sz);
}
if (!userfaultfd_wp(dst_vma))
- entry = huge_pte_clear_uffd_wp(entry);
+ entry = huge_pte_clear_uffd(entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(pte_is_marker(entry))) {
const pte_marker marker = copy_pte_marker(softleaf, dst_vma);
@@ -5013,7 +5013,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
}
if (!userfaultfd_wp(dst_vma))
- entry = huge_pte_clear_uffd_wp(entry);
+ entry = huge_pte_clear_uffd(entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
hugetlb_count_add(npages, dst);
@@ -5061,9 +5061,9 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
} else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
- pte = huge_pte_clear_uffd_wp(pte);
+ pte = huge_pte_clear_uffd(pte);
else
- pte = pte_swp_clear_uffd_wp(pte);
+ pte = pte_swp_clear_uffd(pte);
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
}
@@ -5197,7 +5197,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
* drop the uffd-wp bit in this zap, then replace the
* pte with a marker.
*/
- if (pte_swp_uffd_wp_any(pte) &&
+ if (pte_swp_uffd_any(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP),
@@ -5233,7 +5233,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (huge_pte_dirty(pte))
folio_mark_dirty(folio);
/* Leave a uffd-wp pte marker if needed */
- if (huge_pte_uffd_wp(pte) &&
+ if (huge_pte_uffd(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP),
@@ -5437,7 +5437,7 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
* can trigger this, because hugetlb_fault() will always resolve
* uffd-wp bit first.
*/
- if (!unshare && huge_pte_uffd_wp(pte))
+ if (!unshare && huge_pte_uffd(pte))
return 0;
/* Let's take out MAP_SHARED mappings first. */
@@ -5581,8 +5581,8 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
huge_ptep_clear_flush(vma, vmf->address, vmf->pte);
hugetlb_remove_rmap(old_folio);
hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address);
- if (huge_pte_uffd_wp(pte))
- newpte = huge_pte_mkuffd_wp(newpte);
+ if (huge_pte_uffd(pte))
+ newpte = huge_pte_mkuffd(newpte);
set_huge_pte_at(mm, vmf->address, vmf->pte, newpte,
huge_page_size(h));
folio_set_hugetlb_migratable(new_folio);
@@ -5860,7 +5860,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
* if populated.
*/
if (unlikely(pte_is_uffd_wp_marker(vmf->orig_pte)))
- new_pte = huge_pte_mkuffd_wp(new_pte);
+ new_pte = huge_pte_mkuffd(new_pte);
set_huge_pte_at(mm, vmf->address, vmf->pte, new_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), mm);
@@ -6058,7 +6058,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_ptl;
/* Handle userfault-wp first, before trying to lock more pages */
- if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(mm, vmf.address, vmf.pte)) &&
+ if (userfaultfd_wp(vma) && huge_pte_uffd(huge_ptep_get(mm, vmf.address, vmf.pte)) &&
(flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
if (!userfaultfd_wp_async(vma)) {
spin_unlock(vmf.ptl);
@@ -6067,7 +6067,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_userfault(&vmf, VM_UFFD_WP);
}
- vmf.orig_pte = huge_pte_clear_uffd_wp(vmf.orig_pte);
+ vmf.orig_pte = huge_pte_clear_uffd(vmf.orig_pte);
set_huge_pte_at(mm, vmf.address, vmf.pte, vmf.orig_pte,
huge_page_size(hstate_vma(vma)));
/* Fallthrough to CoW */
@@ -6352,7 +6352,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
_dst_pte = pte_mkyoung(_dst_pte);
if (wp_enabled)
- _dst_pte = huge_pte_mkuffd_wp(_dst_pte);
+ _dst_pte = huge_pte_mkuffd(_dst_pte);
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, size);
@@ -6476,9 +6476,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
}
if (uffd_wp)
- newpte = pte_swp_mkuffd_wp(newpte);
+ newpte = pte_swp_mkuffd(newpte);
else if (uffd_wp_resolve)
- newpte = pte_swp_clear_uffd_wp(newpte);
+ newpte = pte_swp_clear_uffd(newpte);
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(pte_is_marker(pte))) {
@@ -6499,9 +6499,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte = huge_pte_modify(old_pte, newprot);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (uffd_wp)
- pte = huge_pte_mkuffd_wp(pte);
+ pte = huge_pte_mkuffd(pte);
else if (uffd_wp_resolve)
- pte = huge_pte_clear_uffd_wp(pte);
+ pte = huge_pte_clear_uffd(pte);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
tlb_remove_huge_tlb_entry(h, &tlb, ptep, address);
diff --git a/mm/internal.h b/mm/internal.h
index 5602393054f3..9325eefbea6a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -412,8 +412,8 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta)
new = pte_swp_mksoft_dirty(new);
if (pte_swp_exclusive(pte))
new = pte_swp_mkexclusive(new);
- if (pte_swp_uffd_wp(pte))
- new = pte_swp_mkuffd_wp(new);
+ if (pte_swp_uffd(pte))
+ new = pte_swp_mkuffd(new);
return new;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4549a020bf73..afa218be15de 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -37,7 +37,7 @@ enum scan_result {
SCAN_EXCEED_SWAP_PTE,
SCAN_EXCEED_SHARED_PTE,
SCAN_PTE_NON_PRESENT,
- SCAN_PTE_UFFD_WP,
+ SCAN_PTE_UFFD,
SCAN_PTE_MAPPED_HUGEPAGE,
SCAN_LACK_REFERENCED_PAGE,
SCAN_PAGE_NULL,
@@ -712,8 +712,8 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_PTE_NON_PRESENT;
goto out;
}
- if (pte_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ if (pte_uffd(pteval)) {
+ result = SCAN_PTE_UFFD;
goto out;
}
page = vm_normal_page(vma, addr, pteval);
@@ -1566,7 +1566,7 @@ static int mthp_collapse(struct mm_struct *mm, struct vm_area_struct *vma,
case SCAN_PAGE_NULL:
case SCAN_DEL_PAGE_LRU:
case SCAN_PTE_NON_PRESENT:
- case SCAN_PTE_UFFD_WP:
+ case SCAN_PTE_UFFD:
case SCAN_ALLOC_HUGE_PAGE_FAIL:
case SCAN_PAGE_LAZYFREE:
goto next_order;
@@ -1666,15 +1666,15 @@ static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
/*
* Always be strict with uffd-wp
* enabled swap entries. Please see
- * comment below for pte_uffd_wp().
+ * comment below for pte_uffd().
*/
- if (pte_swp_uffd_wp_any(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ if (pte_swp_uffd_any(pteval)) {
+ result = SCAN_PTE_UFFD;
goto out_unmap;
}
continue;
}
- if (pte_uffd_wp(pteval)) {
+ if (pte_uffd(pteval)) {
/*
* Don't collapse the page if any of the small
* PTEs are armed with uffd write protection.
@@ -1684,7 +1684,7 @@ static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
* userfault messages that falls outside of
* the registered range. So, just be simple.
*/
- result = SCAN_PTE_UFFD_WP;
+ result = SCAN_PTE_UFFD;
goto out_unmap;
}
@@ -1897,7 +1897,7 @@ static enum scan_result try_collapse_pte_mapped_thp(struct mm_struct *mm, unsign
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
if (userfaultfd_wp(vma))
- return SCAN_PTE_UFFD_WP;
+ return SCAN_PTE_UFFD;
folio = filemap_lock_folio(vma->vm_file->f_mapping,
linear_page_index(vma, haddr));
@@ -3244,7 +3244,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
/* Whitelisted set of results where continuing OK */
case SCAN_NO_PTE_TABLE:
case SCAN_PTE_NON_PRESENT:
- case SCAN_PTE_UFFD_WP:
+ case SCAN_PTE_UFFD:
case SCAN_LACK_REFERENCED_PAGE:
case SCAN_PAGE_NULL:
case SCAN_PAGE_COUNT:
diff --git a/mm/memory.c b/mm/memory.c
index 7c020995eafc..c4fd5cb4a08f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -893,8 +893,8 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
- if (pte_swp_uffd_wp(orig_pte))
- pte = pte_mkuffd_wp(pte);
+ if (pte_swp_uffd(orig_pte))
+ pte = pte_mkuffd(pte);
if ((vma->vm_flags & VM_WRITE) &&
can_change_pte_writable(vma, address, pte)) {
@@ -984,8 +984,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte = softleaf_to_pte(entry);
if (pte_swp_soft_dirty(orig_pte))
pte = pte_swp_mksoft_dirty(pte);
- if (pte_swp_uffd_wp(orig_pte))
- pte = pte_swp_mkuffd_wp(pte);
+ if (pte_swp_uffd(orig_pte))
+ pte = pte_swp_mkuffd(pte);
set_pte_at(src_mm, addr, src_pte, pte);
}
} else if (softleaf_is_device_private(entry)) {
@@ -1018,8 +1018,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
entry = make_readable_device_private_entry(
swp_offset(entry));
pte = swp_entry_to_pte(entry);
- if (pte_swp_uffd_wp(orig_pte))
- pte = pte_swp_mkuffd_wp(pte);
+ if (pte_swp_uffd(orig_pte))
+ pte = pte_swp_mkuffd(pte);
set_pte_at(src_mm, addr, src_pte, pte);
}
} else if (softleaf_is_device_exclusive(entry)) {
@@ -1042,7 +1042,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return 0;
}
if (!userfaultfd_wp(dst_vma))
- pte = pte_swp_clear_uffd_wp(pte);
+ pte = pte_swp_clear_uffd(pte);
set_pte_at(dst_mm, addr, dst_pte, pte);
return 0;
}
@@ -1090,7 +1090,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
if (userfaultfd_pte_wp(dst_vma, ptep_get(src_pte)))
/* Uffd-wp needs to be delivered to dest pte as well */
- pte = pte_mkuffd_wp(pte);
+ pte = pte_mkuffd(pte);
set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
return 0;
}
@@ -1113,7 +1113,7 @@ static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
pte = pte_mkold(pte);
if (!userfaultfd_wp(dst_vma))
- pte = pte_clear_uffd_wp(pte);
+ pte = pte_clear_uffd(pte);
set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);
}
@@ -3925,8 +3925,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
if (unlikely(unshare)) {
if (pte_soft_dirty(vmf->orig_pte))
entry = pte_mksoft_dirty(entry);
- if (pte_uffd_wp(vmf->orig_pte))
- entry = pte_mkuffd_wp(entry);
+ if (pte_uffd(vmf->orig_pte))
+ entry = pte_mkuffd(entry);
} else {
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
}
@@ -4261,7 +4261,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
* etc.) because we're only removing the uffd-wp bit,
* which is completely invisible to the user.
*/
- pte = pte_clear_uffd_wp(ptep_get(vmf->pte));
+ pte = pte_clear_uffd(ptep_get(vmf->pte));
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
/*
@@ -5038,8 +5038,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
pte = mk_pte(page, vma->vm_page_prot);
if (pte_swp_soft_dirty(vmf->orig_pte))
pte = pte_mksoft_dirty(pte);
- if (pte_swp_uffd_wp(vmf->orig_pte))
- pte = pte_mkuffd_wp(pte);
+ if (pte_swp_uffd(vmf->orig_pte))
+ pte = pte_mkuffd(pte);
/*
* Same logic as in do_wp_page(); however, optimize for pages that are
@@ -5255,7 +5255,7 @@ void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry), vma);
if (uffd_wp)
- entry = pte_mkuffd_wp(entry);
+ entry = pte_mkuffd(entry);
folio_ref_add(folio, nr_pages - 1);
folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
@@ -5322,7 +5322,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
return handle_userfault(vmf, VM_UFFD_MISSING);
}
if (vmf_orig_pte_uffd_wp(vmf))
- entry = pte_mkuffd_wp(entry);
+ entry = pte_mkuffd(entry);
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
/* No need to invalidate - it was non-present before */
@@ -5572,7 +5572,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
else if (pte_write(entry) && folio_test_dirty(folio))
entry = pte_mkdirty(entry);
if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
- entry = pte_mkuffd_wp(entry);
+ entry = pte_mkuffd(entry);
/* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) {
VM_BUG_ON_FOLIO(nr != 1, folio);
diff --git a/mm/migrate.c b/mm/migrate.c
index 0c6a0ab6ecce..4bdb5be7afbf 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -326,8 +326,8 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
if (pte_swp_soft_dirty(old_pte))
newpte = pte_mksoft_dirty(newpte);
- if (pte_swp_uffd_wp(old_pte))
- newpte = pte_mkuffd_wp(newpte);
+ if (pte_swp_uffd(old_pte))
+ newpte = pte_mkuffd(newpte);
set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
@@ -391,8 +391,8 @@ static bool remove_migration_pte(struct folio *folio,
if (softleaf_is_migration_write(entry))
pte = pte_mkwrite(pte, vma);
- else if (pte_swp_uffd_wp(old_pte))
- pte = pte_mkuffd_wp(pte);
+ else if (pte_swp_uffd(old_pte))
+ pte = pte_mkuffd(pte);
if (folio_test_anon(folio) && !softleaf_is_migration_read(entry))
rmap_flags |= RMAP_EXCLUSIVE;
@@ -407,8 +407,8 @@ static bool remove_migration_pte(struct folio *folio,
pte = softleaf_to_pte(entry);
if (pte_swp_soft_dirty(old_pte))
pte = pte_swp_mksoft_dirty(pte);
- if (pte_swp_uffd_wp(old_pte))
- pte = pte_swp_mkuffd_wp(pte);
+ if (pte_swp_uffd(old_pte))
+ pte = pte_swp_mkuffd(pte);
}
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 554754eb26ff..17da1bab0248 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -445,13 +445,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
if (pte_present(pte)) {
if (pte_soft_dirty(pte))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pte))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_uffd(pte))
+ swp_pte = pte_swp_mkuffd(swp_pte);
} else {
if (pte_swp_soft_dirty(pte))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pte))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_swp_uffd(pte))
+ swp_pte = pte_swp_mkuffd(swp_pte);
}
set_pte_at(mm, addr, ptep, swp_pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 9cbf932b028c..8340c8b228c6 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -240,8 +240,8 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
*/
entry = make_readable_device_private_entry(swp_offset(entry));
newpte = swp_entry_to_pte(entry);
- if (pte_swp_uffd_wp(oldpte))
- newpte = pte_swp_mkuffd_wp(newpte);
+ if (pte_swp_uffd(oldpte))
+ newpte = pte_swp_mkuffd(newpte);
} else if (softleaf_is_marker(entry)) {
/*
* Ignore error swap entries unconditionally,
@@ -266,9 +266,9 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
}
if (uffd_wp)
- newpte = pte_swp_mkuffd_wp(newpte);
+ newpte = pte_swp_mkuffd(newpte);
else if (uffd_wp_resolve)
- newpte = pte_swp_clear_uffd_wp(newpte);
+ newpte = pte_swp_clear_uffd(newpte);
if (!pte_same(oldpte, newpte)) {
set_pte_at(vma->vm_mm, addr, pte, newpte);
@@ -290,9 +290,9 @@ static __always_inline void change_present_ptes(struct mmu_gather *tlb,
ptent = pte_modify(oldpte, newprot);
if (uffd_wp)
- ptent = pte_mkuffd_wp(ptent);
+ ptent = pte_mkuffd(ptent);
else if (uffd_wp_resolve)
- ptent = pte_clear_uffd_wp(ptent);
+ ptent = pte_clear_uffd(ptent);
/*
* In some writable, shared mappings, we might want
diff --git a/mm/mremap.c b/mm/mremap.c
index e9c8b1d05832..12732a5c547e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -297,9 +297,9 @@ static int move_ptes(struct pagetable_move_control *pmc,
else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
- pte = pte_clear_uffd_wp(pte);
+ pte = pte_clear_uffd(pte);
else
- pte = pte_swp_clear_uffd_wp(pte);
+ pte = pte_swp_clear_uffd(pte);
}
set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
}
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 53a8997ec043..3fb995e5d40d 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -188,8 +188,8 @@ static inline bool softleaf_cached_writable(softleaf_t entry)
static void page_table_check_pte_flags(pte_t pte)
{
if (pte_present(pte)) {
- WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte));
- } else if (pte_swp_uffd_wp(pte)) {
+ WARN_ON_ONCE(pte_uffd(pte) && pte_write(pte));
+ } else if (pte_swp_uffd(pte)) {
const softleaf_t entry = softleaf_from_pte(pte);
WARN_ON_ONCE(softleaf_cached_writable(entry));
@@ -216,9 +216,9 @@ EXPORT_SYMBOL(__page_table_check_ptes_set);
static inline void page_table_check_pmd_flags(pmd_t pmd)
{
if (pmd_present(pmd)) {
- if (pmd_uffd_wp(pmd))
+ if (pmd_uffd(pmd))
WARN_ON_ONCE(pmd_write(pmd));
- } else if (pmd_swp_uffd_wp(pmd)) {
+ } else if (pmd_swp_uffd(pmd)) {
const softleaf_t entry = softleaf_from_pmd(pmd);
WARN_ON_ONCE(softleaf_cached_writable(entry));
diff --git a/mm/rmap.c b/mm/rmap.c
index 1c77d5dc06e9..546bc1cf9391 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2318,13 +2318,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (likely(pte_present(pteval))) {
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_uffd(pteval))
+ swp_pte = pte_swp_mkuffd(swp_pte);
} else {
if (pte_swp_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_swp_uffd(pteval))
+ swp_pte = pte_swp_mkuffd(swp_pte);
}
set_pte_at(mm, address, pvmw.pte, swp_pte);
} else {
@@ -2692,14 +2692,14 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_uffd(pteval))
+ swp_pte = pte_swp_mkuffd(swp_pte);
} else {
swp_pte = swp_entry_to_pte(entry);
if (pte_swp_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pteval))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_swp_uffd(pteval))
+ swp_pte = pte_swp_mkuffd(swp_pte);
}
if (folio_test_hugetlb(folio))
set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e3d126602a1e..15fdca2da1f7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2557,8 +2557,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
if (pte_swp_soft_dirty(old_pte))
new_pte = pte_mksoft_dirty(new_pte);
- if (pte_swp_uffd_wp(old_pte))
- new_pte = pte_mkuffd_wp(new_pte);
+ if (pte_swp_uffd(old_pte))
+ new_pte = pte_mkuffd(new_pte);
setpte:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
folio_put_swap(swapcache, folio_file_page(swapcache, swp_offset(entry)));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 35b206cc9aa6..ebce642c8805 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -394,7 +394,7 @@ static int mfill_atomic_install_pte(pmd_t *dst_pmd,
if (writable)
_dst_pte = pte_mkwrite(_dst_pte, dst_vma);
if (flags & MFILL_ATOMIC_WP)
- _dst_pte = pte_mkuffd_wp(_dst_pte);
+ _dst_pte = pte_mkuffd(_dst_pte);
ret = -EAGAIN;
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
@@ -3571,7 +3571,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
- if (!pgtable_supports_uffd_wp())
+ if (!pgtable_supports_uffd())
goto out;
vm_flags |= VM_UFFD_WP;
@@ -4281,7 +4281,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &=
~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
#endif
- if (!pgtable_supports_uffd_wp())
+ if (!pgtable_supports_uffd())
uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (!uffd_supports_wp_marker()) {
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 04/14] mm: add VM_UFFD_RWP VMA flag
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (2 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 03/14] mm: rename uffd-wp PTE accessors " Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag Kiryl Shutsemau (Meta)
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Preparatory patch for userfaultfd read-write protection (RWP). RWP
extends userfaultfd protection from plain write-protection (WP) to
full read-write protection: accesses to an RWP-protected range --
reads as well as writes -- trap through userfaultfd.
Reserve VM_UFFD_RWP, add the userfaultfd_rwp() and
userfaultfd_protected() helpers, and wire up the smaps "ur" entry and
the trace-flag table the rest of the series will use. The flag is
gated on CONFIG_USERFAULTFD_RWP, which is introduced together with the
UAPI in a later patch; until then VM_UFFD_RWP aliases VM_NONE and
every downstream check folds to dead code.
Nothing sets or queries the flag yet.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
---
Documentation/filesystems/proc.rst | 1 +
fs/proc/task_mmu.c | 3 +++
include/linux/mm.h | 28 ++++++++++++++++---------
include/linux/userfaultfd_k.h | 33 ++++++++++++++++++++++++------
include/trace/events/mmflags.h | 7 +++++++
5 files changed, 56 insertions(+), 16 deletions(-)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index db6167befb7b..db28207c5290 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -607,6 +607,7 @@ encoded manner. The codes are the following:
um userfaultfd missing tracking
uw userfaultfd wr-protect tracking
ui userfaultfd minor fault
+ ur userfaultfd read-write-protect tracking
ss shadow/guarded control stack page
sl sealed
lf lock on fault pages
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cbd164f4928f..5e74dadfb1cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1237,6 +1237,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
[ilog2(VM_UFFD_MINOR)] = "ui",
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+#ifdef CONFIG_USERFAULTFD_RWP
+ [ilog2(VM_UFFD_RWP)] = "ur",
+#endif
#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0f2612a70fb1..3d0a5ac3c717 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -353,6 +353,7 @@ enum {
#endif
DECLARE_VMA_BIT(UFFD_MINOR, 41),
DECLARE_VMA_BIT(SEALED, 42),
+ DECLARE_VMA_BIT(UFFD_RWP, 43),
/* Flags that reuse flags above. */
DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
@@ -496,6 +497,11 @@ enum {
#else
#define VM_UFFD_MINOR VM_NONE
#endif
+#ifdef CONFIG_USERFAULTFD_RWP
+#define VM_UFFD_RWP INIT_VM_FLAG(UFFD_RWP)
+#else
+#define VM_UFFD_RWP VM_NONE
+#endif
#ifdef CONFIG_64BIT
#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
#define VM_SEALED INIT_VM_FLAG(SEALED)
@@ -633,22 +639,24 @@ enum {
* reconsistuted upon page fault, so necessitate page table copying upon fork.
*
* Note that these flags should be compared with the DESTINATION VMA not the
- * source, as VM_UFFD_WP may not be propagated to destination, while all other
- * flags will be.
+ * source: VM_UFFD_WP and VM_UFFD_RWP may be cleared on the destination
+ * (dup_userfaultfd() -> userfaultfd_reset_ctx() when the parent context did
+ * not negotiate UFFD_FEATURE_EVENT_FORK), while all other flags propagate.
*
* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
* reasonably reconstructed on page fault.
*
* VM_UFFD_WP - Encodes metadata about an installed uffd
- * write protect handler, which cannot be
- * reconstructed on page fault.
+ * VM_UFFD_RWP write- or read-write-protect handler, which
+ * cannot be reconstructed on page fault.
*
- * We always copy pgtables when dst_vma has uffd-wp
- * enabled even if it's file-backed
- * (e.g. shmem). Because when uffd-wp is enabled,
- * pgtable contains uffd-wp protection information,
- * that's something we can't retrieve from page cache,
- * and skip copying will lose those info.
+ * We always copy pgtables when dst_vma has the
+ * uffd PTE bit in use even if it's file-backed
+ * (e.g. shmem). Because when the uffd bit is
+ * in use, the pgtable contains the protection
+ * information, that's something we can't
+ * retrieve from page cache, and skip copying
+ * will lose those info.
*
* VM_MAYBE_GUARD - Could contain page guard region markers which
* by design are a property of the page tables
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f4cf5763f92c..87a8cebd5938 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -21,10 +21,11 @@
#include <linux/hugetlb_inline.h>
/* The set of all possible UFFD-related VM flags. */
-#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR)
+#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_MINOR | \
+ VM_UFFD_WP | VM_UFFD_RWP)
-#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_WP_BIT, \
- VMA_UFFD_MINOR_BIT)
+#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_MINOR_BIT, \
+ VMA_UFFD_WP_BIT, VMA_UFFD_RWP_BIT)
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -178,7 +179,7 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
*/
static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR);
+ return vma->vm_flags & (VM_UFFD_MINOR | VM_UFFD_WP | VM_UFFD_RWP);
}
/*
@@ -208,6 +209,16 @@ static inline bool userfaultfd_minor(struct vm_area_struct *vma)
return vma->vm_flags & VM_UFFD_MINOR;
}
+static inline bool userfaultfd_rwp(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_UFFD_RWP;
+}
+
+static inline bool userfaultfd_protected(struct vm_area_struct *vma)
+{
+ return userfaultfd_wp(vma) || userfaultfd_rwp(vma);
+}
+
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
@@ -328,6 +339,16 @@ static inline bool userfaultfd_minor(struct vm_area_struct *vma)
return false;
}
+static inline bool userfaultfd_rwp(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline bool userfaultfd_protected(struct vm_area_struct *vma)
+{
+ return false;
+}
+
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
@@ -421,8 +442,8 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
}
/*
- * Returns true if this is a swap pte and was uffd-wp wr-protected in either
- * forms (pte marker or a normal swap pte), false otherwise.
+ * Returns true if this swap pte carries uffd-tracked state in either
+ * form (pte marker or a normal swap pte), false otherwise.
*/
static inline bool pte_swp_uffd_any(pte_t pte)
{
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a6e5a44c9b42..bfface3d0203 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -194,6 +194,12 @@ IF_HAVE_PG_ARCH_3(arch_3)
# define IF_HAVE_UFFD_MINOR(flag, name)
#endif
+#ifdef CONFIG_USERFAULTFD_RWP
+# define IF_HAVE_UFFD_RWP(flag, name) {flag, name},
+#else
+# define IF_HAVE_UFFD_RWP(flag, name)
+#endif
+
#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
# define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
#else
@@ -215,6 +221,7 @@ IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \
{VM_PFNMAP, "pfnmap" }, \
{VM_MAYBE_GUARD, "maybe_guard" }, \
{VM_UFFD_WP, "uffd_wp" }, \
+IF_HAVE_UFFD_RWP(VM_UFFD_RWP, "uffd_rwp" ) \
{VM_LOCKED, "locked" }, \
{VM_IO, "io" }, \
{VM_SEQ_READ, "seqread" }, \
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (3 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 04/14] mm: add VM_UFFD_RWP VMA flag Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 06/14] mm: preserve RWP marker across PTE rewrites Kiryl Shutsemau (Meta)
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Preparatory patch. Add the change_protection() primitive that
userfaultfd RWP will use.
An RWP-protected PTE is PAGE_NONE with the uffd PTE bit set. The
PROT_NONE half makes the CPU fault on any access; the uffd bit
distinguishes an RWP fault from a plain mprotect(PROT_NONE) or NUMA
hinting fault. MM_CP_UFFD_WP and MM_CP_UFFD_RWP share the same PTE
bit, so the two cannot be used together on the same range.
Two new change_protection() flags:
MM_CP_UFFD_RWP install PAGE_NONE and set the uffd bit
MM_CP_UFFD_RWP_RESOLVE restore vma->vm_page_prot, clear the uffd bit
Both are wired through change_pte_range(), change_huge_pmd(), and
hugetlb_change_protection() so anon, shmem, THP, and hugetlb all
share the same semantics.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/mm.h | 5 ++++
include/linux/userfaultfd_k.h | 1 -
mm/huge_memory.c | 30 +++++++++++++----------
mm/hugetlb.c | 25 ++++++++++++++-----
mm/mprotect.c | 46 +++++++++++++++++++++++++++--------
5 files changed, 77 insertions(+), 30 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d0a5ac3c717..ecbf3e83a892 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3286,6 +3286,11 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
MM_CP_UFFD_WP_RESOLVE)
+/* Whether this change is for uffd RWP */
+#define MM_CP_UFFD_RWP (1UL << 4) /* do rwp */
+#define MM_CP_UFFD_RWP_RESOLVE (1UL << 5) /* resolve rwp */
+#define MM_CP_UFFD_RWP_ALL (MM_CP_UFFD_RWP | \
+ MM_CP_UFFD_RWP_RESOLVE)
bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 87a8cebd5938..16fbe11c0c55 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -361,7 +361,6 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
return false;
}
-
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
return false;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6017c73c92a0..0d05abb0cd81 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2640,8 +2640,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
}
static void change_non_present_huge_pmd(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmd, bool uffd_wp,
- bool uffd_wp_resolve)
+ unsigned long addr, pmd_t *pmd, bool uffd_prot,
+ bool uffd_prot_resolve)
{
softleaf_t entry = softleaf_from_pmd(*pmd);
const struct folio *folio = softleaf_to_folio(entry);
@@ -2667,9 +2667,9 @@ static void change_non_present_huge_pmd(struct mm_struct *mm,
newpmd = *pmd;
}
- if (uffd_wp)
+ if (uffd_prot)
newpmd = pmd_swp_mkuffd(newpmd);
- else if (uffd_wp_resolve)
+ else if (uffd_prot_resolve)
newpmd = pmd_swp_clear_uffd(newpmd);
if (!pmd_same(*pmd, newpmd))
set_pmd_at(mm, addr, pmd, newpmd);
@@ -2690,8 +2690,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
spinlock_t *ptl;
pmd_t oldpmd, entry;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
- bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
- bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ bool uffd_prot = cp_flags & (MM_CP_UFFD_WP | MM_CP_UFFD_RWP);
+ bool uffd_prot_resolve = cp_flags &
+ (MM_CP_UFFD_WP_RESOLVE | MM_CP_UFFD_RWP_RESOLVE);
int ret = 1;
tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
@@ -2704,11 +2705,17 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
return 0;
if (thp_migration_supported() && pmd_is_valid_softleaf(*pmd)) {
- change_non_present_huge_pmd(mm, addr, pmd, uffd_wp,
- uffd_wp_resolve);
+ change_non_present_huge_pmd(mm, addr, pmd, uffd_prot,
+ uffd_prot_resolve);
goto unlock;
}
+ /* Already in the desired state */
+ if (prot_numa && pmd_protnone(*pmd))
+ goto unlock;
+ if ((cp_flags & MM_CP_UFFD_RWP) && pmd_protnone(*pmd) && pmd_uffd(*pmd))
+ goto unlock;
+
if (prot_numa) {
/*
@@ -2719,9 +2726,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_huge_zero_pmd(*pmd))
goto unlock;
- if (pmd_protnone(*pmd))
- goto unlock;
-
if (!folio_can_map_prot_numa(pmd_folio(*pmd), vma,
vma_is_single_threaded_private(vma)))
goto unlock;
@@ -2750,9 +2754,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
entry = pmd_modify(oldpmd, newprot);
- if (uffd_wp)
+ if (uffd_prot)
entry = pmd_mkuffd(entry);
- else if (uffd_wp_resolve)
+ else if (uffd_prot_resolve)
/*
* Leave the write bit to be handled by PF interrupt
* handler, then things like COW could be properly
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d0c81a056ae2..4d75b69d4272 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6395,6 +6395,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long last_addr_mask;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
+ bool uffd_rwp_resolve = cp_flags & MM_CP_UFFD_RWP_RESOLVE;
struct mmu_gather tlb;
/*
@@ -6420,6 +6422,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
ptep = hugetlb_walk(vma, address, psize);
if (!ptep) {
+ /*
+ * uffd_wp installs a pte marker on the unpopulated
+ * entry; uffd_rwp does not install markers so the
+ * allocation is unnecessary for it.
+ */
if (!uffd_wp) {
address |= last_addr_mask;
continue;
@@ -6441,7 +6448,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
* shouldn't happen at all. Warn about it if it
* happened due to some reason.
*/
- WARN_ON_ONCE(uffd_wp || uffd_wp_resolve);
+ WARN_ON_ONCE(uffd_wp || uffd_wp_resolve ||
+ uffd_rwp || uffd_rwp_resolve);
pages++;
spin_unlock(ptl);
address |= last_addr_mask;
@@ -6475,9 +6483,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pages++;
}
- if (uffd_wp)
+ if (uffd_wp || uffd_rwp)
newpte = pte_swp_mkuffd(newpte);
- else if (uffd_wp_resolve)
+ else if (uffd_wp_resolve || uffd_rwp_resolve)
newpte = pte_swp_clear_uffd(newpte);
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
@@ -6488,19 +6496,24 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
* pte_marker_uffd_wp()==true implies !poison
* because they're mutual exclusive.
*/
- if (pte_is_uffd_wp_marker(pte) && uffd_wp_resolve)
+ if (pte_is_uffd_wp_marker(pte) &&
+ (uffd_wp_resolve || uffd_rwp_resolve))
/* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
} else {
pte_t old_pte;
unsigned int shift = huge_page_shift(hstate_vma(vma));
+ /* Already protnone with uffd bit set? Nothing to do. */
+ if (uffd_rwp && pte_protnone(pte) && huge_pte_uffd(pte))
+ goto next;
+
old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
pte = huge_pte_modify(old_pte, newprot);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
- if (uffd_wp)
+ if (uffd_wp || uffd_rwp)
pte = huge_pte_mkuffd(pte);
- else if (uffd_wp_resolve)
+ else if (uffd_wp_resolve || uffd_rwp_resolve)
pte = huge_pte_clear_uffd(pte);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8340c8b228c6..4a6b35482aee 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -214,8 +214,9 @@ static __always_inline void set_write_prot_commit_flush_ptes(struct vm_area_stru
static long change_softleaf_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte, pte_t oldpte, unsigned long cp_flags)
{
- const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
- const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ const bool uffd_prot = cp_flags & (MM_CP_UFFD_WP | MM_CP_UFFD_RWP);
+ const bool uffd_prot_resolve = cp_flags &
+ (MM_CP_UFFD_WP_RESOLVE | MM_CP_UFFD_RWP_RESOLVE);
softleaf_t entry = softleaf_from_pte(oldpte);
pte_t newpte;
@@ -256,7 +257,7 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
* to unprotect it, drop it; the next page
* fault will trigger without uffd trapping.
*/
- if (uffd_wp_resolve) {
+ if (uffd_prot_resolve) {
pte_clear(vma->vm_mm, addr, pte);
return 1;
}
@@ -265,9 +266,9 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
newpte = oldpte;
}
- if (uffd_wp)
+ if (uffd_prot)
newpte = pte_swp_mkuffd(newpte);
- else if (uffd_wp_resolve)
+ else if (uffd_prot_resolve)
newpte = pte_swp_clear_uffd(newpte);
if (!pte_same(oldpte, newpte)) {
@@ -282,16 +283,17 @@ static __always_inline void change_present_ptes(struct mmu_gather *tlb,
int nr_ptes, unsigned long end, pgprot_t newprot,
struct folio *folio, struct page *page, unsigned long cp_flags)
{
- const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
- const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+ const bool uffd_prot = cp_flags & (MM_CP_UFFD_WP | MM_CP_UFFD_RWP);
+ const bool uffd_prot_resolve = cp_flags &
+ (MM_CP_UFFD_WP_RESOLVE | MM_CP_UFFD_RWP_RESOLVE);
pte_t ptent, oldpte;
oldpte = modify_prot_start_ptes(vma, addr, ptep, nr_ptes);
ptent = pte_modify(oldpte, newprot);
- if (uffd_wp)
+ if (uffd_prot)
ptent = pte_mkuffd(ptent);
- else if (uffd_wp_resolve)
+ else if (uffd_prot_resolve)
ptent = pte_clear_uffd(ptent);
/*
@@ -325,6 +327,7 @@ static long change_pte_range(struct mmu_gather *tlb,
long pages = 0;
bool is_private_single_threaded;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
+ bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
int nr_ptes;
@@ -350,6 +353,14 @@ static long change_pte_range(struct mmu_gather *tlb,
/* Already in the desired state. */
if (prot_numa && pte_protnone(oldpte))
continue;
+ /*
+ * RWP-protected PTEs carry _PAGE_UFFD as a marker on
+ * top of PROT_NONE. Skip only entries already in that
+ * exact state; plain PROT_NONE from mprotect() still needs
+ * to be promoted so future faults can be distinguished.
+ */
+ if (uffd_rwp && pte_protnone(oldpte) && pte_uffd(oldpte))
+ continue;
page = vm_normal_page(vma, addr, oldpte);
if (page)
@@ -358,6 +369,8 @@ static long change_pte_range(struct mmu_gather *tlb,
/*
* Avoid trapping faults against the zero or KSM
* pages. See similar comment in change_huge_pmd.
+ * Skip this filter for uffd RWP which
+ * must set protnone regardless of NUMA placement.
*/
if (prot_numa &&
!folio_can_map_prot_numa(folio, vma,
@@ -667,7 +680,16 @@ long change_protection(struct mmu_gather *tlb,
pgprot_t newprot = vma->vm_page_prot;
long pages;
- BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
+ /*
+ * MM_CP_UFFD_{WP,RWP} and _RESOLVE are mutually exclusive within one
+ * change, and WP and RWP cannot mix. Miswired callers get a warn and
+ * a no-op; userspace cannot reach this state.
+ */
+ if (WARN_ON_ONCE((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL ||
+ (cp_flags & MM_CP_UFFD_RWP_ALL) == MM_CP_UFFD_RWP_ALL ||
+ ((cp_flags & MM_CP_UFFD_WP_ALL) &&
+ (cp_flags & MM_CP_UFFD_RWP_ALL))))
+ return 0;
#ifdef CONFIG_NUMA_BALANCING
/*
@@ -681,6 +703,10 @@ long change_protection(struct mmu_gather *tlb,
WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA);
#endif
+ if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_PROTNONE) &&
+ (cp_flags & MM_CP_UFFD_RWP))
+ newprot = PAGE_NONE;
+
if (is_vm_hugetlb_page(vma))
pages = hugetlb_change_protection(vma, start, end, newprot,
cp_flags);
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 06/14] mm: preserve RWP marker across PTE rewrites
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (4 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP Kiryl Shutsemau (Meta)
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
The uffd PTE bit must survive any kernel path that rewrites a PTE
on a VM_UFFD_RWP VMA, otherwise the marker that carries PAGE_NONE
semantics is silently dropped and the next access leaks past RWP
tracking. Wire the preservation through every path that rewrites a
VM_UFFD_RWP PTE.
Swap and device-exclusive: do_swap_page(), restore_exclusive_pte(),
and unuse_pte() (swapoff()) re-apply PAGE_NONE when the swap PTE
carries the uffd bit and the VMA has VM_UFFD_RWP.
Migration: remove_migration_pte() and remove_migration_pmd() do the
same after the migration entry is replaced with a real PTE/PMD.
Fork: __copy_present_ptes(), copy_present_page(), copy_nonpresent_pte(),
copy_huge_pmd(), copy_huge_non_present_pmd(), and
copy_hugetlb_page_range() keep the uffd bit on the child when the
destination VMA has VM_UFFD_RWP, matching the existing VM_UFFD_WP
handling. Add VM_UFFD_RWP to VM_COPY_ON_FORK so the flag itself
propagates.
mprotect(): change_pte_range() and change_huge_pmd() restore PAGE_NONE
after pte_modify()/pmd_modify() have recomputed the base protection
from a (possibly user-changed) vm_page_prot. pte_modify() preserves
_PAGE_UFFD, so the bit stays; we just have to force PAGE_NONE back
on top.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/mm.h | 3 ++-
mm/huge_memory.c | 47 +++++++++++++++++++++++++++++++++++++----
mm/hugetlb.c | 52 ++++++++++++++++++++++++++++++++++++++--------
mm/memory.c | 47 ++++++++++++++++++++++++++++++++++-------
mm/migrate.c | 8 +++++++
mm/mprotect.c | 10 +++++++++
mm/mremap.c | 13 ++++++++++--
mm/swapfile.c | 5 +++++
mm/userfaultfd.c | 17 +++++++++++++++
9 files changed, 179 insertions(+), 23 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecbf3e83a892..5953106758fa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -663,7 +663,8 @@ enum {
* only and thus cannot be reconstructed on page
* fault.
*/
-#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
+#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_UFFD_RWP | \
+ VM_MAYBE_GUARD)
/*
* mapping from the currently active vm_flags protection bits (the
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0d05abb0cd81..8620ba92263f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1943,7 +1943,7 @@ static void copy_huge_non_present_pmd(
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(dst_mm);
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
- if (!userfaultfd_wp(dst_vma))
+ if (!userfaultfd_protected(dst_vma))
pmd = pmd_swp_clear_uffd(pmd);
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
}
@@ -2038,9 +2038,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
out_zero_page:
mm_inc_nr_ptes(dst_mm);
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
- pmdp_set_wrprotect(src_mm, addr, src_pmd);
- if (!userfaultfd_wp(dst_vma))
+
+ /* See __copy_present_ptes(): restore accessible protection. */
+ if (!userfaultfd_protected(dst_vma)) {
+ if (userfaultfd_rwp(src_vma))
+ pmd = pmd_modify(pmd, dst_vma->vm_page_prot);
pmd = pmd_clear_uffd(pmd);
+ }
+
+ pmdp_set_wrprotect(src_mm, addr, src_pmd);
pmd = pmd_wrprotect(pmd);
set_pmd:
pmd = pmd_mkold(pmd);
@@ -2626,8 +2632,16 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
- if (vma_has_uffd_without_event_remap(vma))
+ if (vma_has_uffd_without_event_remap(vma)) {
+ /*
+ * See __copy_present_ptes(): normalise RWP PMDs so
+ * the destination starts accessible instead of taking
+ * a numa-hinting fault on first access.
+ */
+ if (pmd_present(pmd) && userfaultfd_rwp(vma))
+ pmd = pmd_modify(pmd, vma->vm_page_prot);
pmd = clear_uffd_wp_pmd(pmd);
+ }
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
@@ -2764,6 +2778,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
entry = pmd_clear_uffd(entry);
+ /* See change_pte_range(): preserve RWP protection across mprotect() */
+ if (userfaultfd_rwp(vma) && pmd_uffd(entry))
+ entry = pmd_modify(entry, PAGE_NONE);
+
/* See change_pte_range(). */
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
can_change_pmd_writable(vma, addr, entry))
@@ -2931,6 +2949,13 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
_dst_pmd = move_soft_dirty_pmd(src_pmdval);
_dst_pmd = clear_uffd_wp_pmd(_dst_pmd);
}
+
+ /* Re-arm RWP on the moved PMD if dst_vma is RWP-registered. */
+ if (userfaultfd_rwp(dst_vma)) {
+ _dst_pmd = pmd_modify(_dst_pmd, PAGE_NONE);
+ _dst_pmd = pmd_mkuffd(_dst_pmd);
+ }
+
set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd);
src_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);
@@ -3107,6 +3132,11 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
entry = pte_mkspecial(entry);
if (pmd_uffd(old_pmd))
entry = pte_mkuffd(entry);
+
+ /* Restore PAGE_NONE so an RWP marker keeps trapping */
+ if (userfaultfd_rwp(vma) && pmd_uffd(old_pmd))
+ entry = pte_modify(entry, PAGE_NONE);
+
VM_BUG_ON(!pte_none(ptep_get(pte)));
set_pte_at(mm, addr, pte, entry);
pte++;
@@ -3381,6 +3411,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (uffd_wp)
entry = pte_mkuffd(entry);
+ /* Restore PAGE_NONE so an RWP marker keeps trapping */
+ if (userfaultfd_rwp(vma) && uffd_wp)
+ entry = pte_modify(entry, PAGE_NONE);
+
for (i = 0; i < HPAGE_PMD_NR; i++)
VM_WARN_ON(!pte_none(ptep_get(pte + i)));
@@ -5053,6 +5087,11 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
pmde = pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd(*pvmw->pmd))
pmde = pmd_mkuffd(pmde);
+
+ /* See do_swap_page(): restore PAGE_NONE for RWP */
+ if (pmd_swp_uffd(*pvmw->pmd) && userfaultfd_rwp(vma))
+ pmde = pmd_modify(pmde, PAGE_NONE);
+
if (!softleaf_is_migration_young(entry))
pmde = pmd_mkold(pmde);
/* NOTE: this may contain setting soft-dirty on some archs */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4d75b69d4272..8555810cd42e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4843,8 +4843,16 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
__folio_mark_uptodate(new_folio);
hugetlb_add_new_anon_rmap(new_folio, vma, addr);
- if (userfaultfd_wp(vma) && huge_pte_uffd(old))
+ if (userfaultfd_protected(vma) && huge_pte_uffd(old)) {
newpte = huge_pte_mkuffd(newpte);
+ /* Restore PAGE_NONE so the RWP marker keeps trapping. */
+ if (userfaultfd_rwp(vma)) {
+ unsigned int shift = huge_page_shift(hstate_vma(vma));
+
+ newpte = huge_pte_modify(newpte, PAGE_NONE);
+ newpte = arch_make_huge_pte(newpte, shift, vma->vm_flags);
+ }
+ }
set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
@@ -4917,7 +4925,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
softleaf = softleaf_from_pte(entry);
if (unlikely(softleaf_is_hwpoison(softleaf))) {
- if (!userfaultfd_wp(dst_vma))
+ if (!userfaultfd_protected(dst_vma))
entry = huge_pte_clear_uffd(entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(softleaf_is_migration(softleaf))) {
@@ -4931,11 +4939,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
softleaf = make_readable_migration_entry(
swp_offset(softleaf));
entry = swp_entry_to_pte(softleaf);
- if (userfaultfd_wp(src_vma) && uffd)
+ if (userfaultfd_protected(src_vma) && uffd)
entry = pte_swp_mkuffd(entry);
set_huge_pte_at(src, addr, src_pte, entry, sz);
}
- if (!userfaultfd_wp(dst_vma))
+ if (!userfaultfd_protected(dst_vma))
entry = huge_pte_clear_uffd(entry);
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(pte_is_marker(entry))) {
@@ -5000,6 +5008,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
goto next;
}
+ /* See __copy_present_ptes(): restore accessible protection. */
+ if (!userfaultfd_protected(dst_vma)) {
+ if (userfaultfd_rwp(src_vma)) {
+ entry = huge_pte_modify(entry, dst_vma->vm_page_prot);
+ entry = arch_make_huge_pte(entry, huge_page_shift(h),
+ dst_vma->vm_flags);
+ }
+ entry = huge_pte_clear_uffd(entry);
+ }
+
if (cow) {
/*
* No need to notify as we are downgrading page
@@ -5012,9 +5030,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
entry = huge_pte_wrprotect(entry);
}
- if (!userfaultfd_wp(dst_vma))
- entry = huge_pte_clear_uffd(entry);
-
set_huge_pte_at(dst, addr, dst_pte, entry, sz);
hugetlb_count_add(npages, dst);
}
@@ -5060,10 +5075,22 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
huge_pte_clear(mm, new_addr, dst_pte, sz);
} else {
if (need_clear_uffd_wp) {
- if (pte_present(pte))
+ if (pte_present(pte)) {
+ /*
+ * See __copy_present_ptes(): normalise RWP
+ * PTEs so the destination starts accessible
+ * instead of taking a numa-hinting fault on
+ * first access.
+ */
+ if (userfaultfd_rwp(vma)) {
+ pte = huge_pte_modify(pte, vma->vm_page_prot);
+ pte = arch_make_huge_pte(pte, huge_page_shift(h),
+ vma->vm_flags);
+ }
pte = huge_pte_clear_uffd(pte);
- else
+ } else {
pte = pte_swp_clear_uffd(pte);
+ }
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
}
@@ -6515,6 +6542,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte = huge_pte_mkuffd(pte);
else if (uffd_wp_resolve || uffd_rwp_resolve)
pte = huge_pte_clear_uffd(pte);
+
+ /* Preserve RWP protection across mprotect() */
+ if (userfaultfd_rwp(vma) && huge_pte_uffd(pte)) {
+ pte = huge_pte_modify(pte, PAGE_NONE);
+ pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
+ }
+
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
tlb_remove_huge_tlb_entry(h, &tlb, ptep, address);
diff --git a/mm/memory.c b/mm/memory.c
index c4fd5cb4a08f..e4ae5350db41 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -896,6 +896,10 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
if (pte_swp_uffd(orig_pte))
pte = pte_mkuffd(pte);
+ /* See do_swap_page(): restore PAGE_NONE for RWP */
+ if (pte_swp_uffd(orig_pte) && userfaultfd_rwp(vma))
+ pte = pte_modify(pte, PAGE_NONE);
+
if ((vma->vm_flags & VM_WRITE) &&
can_change_pte_writable(vma, address, pte)) {
if (folio_test_dirty(folio))
@@ -1041,7 +1045,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
make_pte_marker(marker));
return 0;
}
- if (!userfaultfd_wp(dst_vma))
+ if (!userfaultfd_protected(dst_vma))
pte = pte_swp_clear_uffd(pte);
set_pte_at(dst_mm, addr, dst_pte, pte);
return 0;
@@ -1088,9 +1092,13 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
/* All done, just insert the new page copy in the child */
pte = folio_mk_pte(new_folio, dst_vma->vm_page_prot);
pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
- if (userfaultfd_pte_wp(dst_vma, ptep_get(src_pte)))
- /* Uffd-wp needs to be delivered to dest pte as well */
+ if (userfaultfd_protected(dst_vma) && pte_uffd(ptep_get(src_pte))) {
+ /* The uffd bit needs to be delivered to the dest pte as well */
pte = pte_mkuffd(pte);
+ /* Restore PAGE_NONE so the RWP marker keeps trapping */
+ if (userfaultfd_rwp(dst_vma))
+ pte = pte_modify(pte, PAGE_NONE);
+ }
set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
return 0;
}
@@ -1100,9 +1108,29 @@ static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
pte_t pte, unsigned long addr, int nr)
{
struct mm_struct *src_mm = src_vma->vm_mm;
+ bool writable;
+
+ /*
+ * Snapshot writability before the RWP-disarm rewrite below: when the
+ * child is not RWP-armed, pte_modify(pte, dst_vma->vm_page_prot) can
+ * silently drop _PAGE_RW from a resolved (no-marker) writable PTE,
+ * so a later pte_write(pte) check would skip the COW wrprotect and
+ * leave the parent writable over a folio shared with the child.
+ */
+ writable = pte_write(pte);
+
+ /*
+ * Child is not RWP-armed: restore accessible protection so the
+ * inherited PAGE_NONE does not cost a fault on first read.
+ */
+ if (!userfaultfd_protected(dst_vma)) {
+ if (userfaultfd_rwp(src_vma))
+ pte = pte_modify(pte, dst_vma->vm_page_prot);
+ pte = pte_clear_uffd(pte);
+ }
/* If it's a COW mapping, write protect it both processes. */
- if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) {
+ if (is_cow_mapping(src_vma->vm_flags) && writable) {
wrprotect_ptes(src_mm, addr, src_pte, nr);
pte = pte_wrprotect(pte);
}
@@ -1112,9 +1140,6 @@ static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
- if (!userfaultfd_wp(dst_vma))
- pte = pte_clear_uffd(pte);
-
set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);
}
@@ -5041,6 +5066,14 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (pte_swp_uffd(vmf->orig_pte))
pte = pte_mkuffd(pte);
+ /*
+ * A page reclaimed while RWP-protected carries the uffd bit on
+ * its swap entry. Re-apply PAGE_NONE on swap-in so the first access
+ * still traps as an RWP fault. pte_modify() preserves _PAGE_UFFD.
+ */
+ if (pte_swp_uffd(vmf->orig_pte) && userfaultfd_rwp(vma))
+ pte = pte_modify(pte, PAGE_NONE);
+
/*
* Same logic as in do_wp_page(); however, optimize for pages that are
* certainly not shared either because we just allocated them without
diff --git a/mm/migrate.c b/mm/migrate.c
index 4bdb5be7afbf..8d7fd0b056b6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -329,6 +329,10 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
if (pte_swp_uffd(old_pte))
newpte = pte_mkuffd(newpte);
+ /* See remove_migration_pte(): restore PAGE_NONE for RWP */
+ if (pte_swp_uffd(old_pte) && userfaultfd_rwp(pvmw->vma))
+ newpte = pte_modify(newpte, PAGE_NONE);
+
set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
@@ -394,6 +398,10 @@ static bool remove_migration_pte(struct folio *folio,
else if (pte_swp_uffd(old_pte))
pte = pte_mkuffd(pte);
+ /* See do_swap_page(): restore PAGE_NONE for RWP */
+ if (pte_swp_uffd(old_pte) && userfaultfd_rwp(vma))
+ pte = pte_modify(pte, PAGE_NONE);
+
if (folio_test_anon(folio) && !softleaf_is_migration_read(entry))
rmap_flags |= RMAP_EXCLUSIVE;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4a6b35482aee..e0b5fe7c66b2 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -296,6 +296,16 @@ static __always_inline void change_present_ptes(struct mmu_gather *tlb,
else if (uffd_prot_resolve)
ptent = pte_clear_uffd(ptent);
+ /*
+ * The uffd bit on a VM_UFFD_RWP VMA carries PROT_NONE
+ * semantics. If mprotect() or NUMA hinting changed the
+ * base protection, restore PAGE_NONE so the PTE still
+ * traps on any access. pte_modify() preserves
+ * _PAGE_UFFD.
+ */
+ if (userfaultfd_rwp(vma) && pte_uffd(ptent))
+ ptent = pte_modify(ptent, PAGE_NONE);
+
/*
* In some writable, shared mappings, we might want
* to catch actual write access -- see
diff --git a/mm/mremap.c b/mm/mremap.c
index 12732a5c547e..14e5df316f83 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -296,10 +296,19 @@ static int move_ptes(struct pagetable_move_control *pmc,
pte_clear(mm, new_addr, new_ptep);
else {
if (need_clear_uffd_wp) {
- if (pte_present(pte))
+ if (pte_present(pte)) {
+ /*
+ * See __copy_present_ptes(): normalise
+ * RWP PTEs so the destination starts
+ * accessible instead of taking a
+ * numa-hinting fault on first access.
+ */
+ if (userfaultfd_rwp(vma))
+ pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_clear_uffd(pte);
- else
+ } else {
pte = pte_swp_clear_uffd(pte);
+ }
}
set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 15fdca2da1f7..27cc299ead9b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2559,6 +2559,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
new_pte = pte_mksoft_dirty(new_pte);
if (pte_swp_uffd(old_pte))
new_pte = pte_mkuffd(new_pte);
+
+ /* See do_swap_page(): restore PAGE_NONE for RWP */
+ if (pte_swp_uffd(old_pte) && userfaultfd_rwp(vma))
+ new_pte = pte_modify(new_pte, PAGE_NONE);
+
setpte:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
folio_put_swap(swapcache, folio_file_page(swapcache, swp_offset(entry)));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index ebce642c8805..9799abff1e76 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1285,6 +1285,13 @@ static long move_present_ptes(struct mm_struct *mm,
if (pte_dirty(orig_src_pte))
orig_dst_pte = pte_mkdirty(orig_dst_pte);
orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
+
+ /* Re-arm RWP on the moved PTE if dst_vma is RWP-registered. */
+ if (userfaultfd_rwp(dst_vma)) {
+ orig_dst_pte = pte_modify(orig_dst_pte, PAGE_NONE);
+ orig_dst_pte = pte_mkuffd(orig_dst_pte);
+ }
+
set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
src_addr += PAGE_SIZE;
@@ -1366,6 +1373,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
if (pgtable_supports_soft_dirty())
orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+ /* Re-arm RWP on the moved swap entry if dst_vma is RWP-registered. */
+ if (userfaultfd_rwp(dst_vma))
+ orig_src_pte = pte_swp_mkuffd(orig_src_pte);
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);
@@ -1392,6 +1402,13 @@ static int move_zeropage_pte(struct mm_struct *mm,
zero_pte = pte_mkspecial(pfn_pte(zero_pfn(dst_addr),
dst_vma->vm_page_prot));
+
+ /* Re-arm RWP on the moved PTE if dst_vma is RWP-registered. */
+ if (userfaultfd_rwp(dst_vma)) {
+ zero_pte = pte_modify(zero_pte, PAGE_NONE);
+ zero_pte = pte_mkuffd(zero_pte);
+ }
+
ptep_clear_flush(src_vma, src_addr, src_pte);
set_pte_at(mm, dst_addr, dst_pte, zero_pte);
double_pt_unlock(dst_ptl, src_ptl);
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (5 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 06/14] mm: preserve RWP marker across PTE rewrites Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing Kiryl Shutsemau (Meta)
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Three mm paths outside the fault handler gate on the uffd PTE bit
today: khugepaged (skip collapse on ranges carrying markers), rmap
(cap unmap batching), and GUP (force a fault through
gup_can_follow_protnone). Extend each to treat VM_UFFD_RWP the same
as VM_UFFD_WP; otherwise per-PTE RWP state is silently destroyed or
bypassed.
khugepaged: try_collapse_pte_mapped_thp() and
file_backed_vma_is_retractable() already refuse to collapse or
retract page tables on ranges carrying the uffd PTE bit. Broaden the
VMA predicate from userfaultfd_wp() to userfaultfd_protected() so
VM_UFFD_RWP ranges get the same protection. hpage_collapse_scan_pmd()
needs no change — its existing pte_uffd() check already catches an
RWP PTE because it carries the uffd bit.
rmap: folio_unmap_pte_batch() caps batching at 1 for VM_UFFD_RWP so
the restore path handles each PTE with its own marker.
GUP: gup_can_follow_protnone() forces a fault on VM_UFFD_RWP VMAs
regardless of FOLL_HONOR_NUMA_FAULT. RWP uses protnone as an
access-tracking marker, not for NUMA hinting, so any GUP — read or
write — must go through the userfaultfd fault path.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/mm.h | 10 +++++++++-
mm/khugepaged.c | 18 +++++++++++-------
mm/rmap.c | 2 +-
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5953106758fa..f72bf5ccf72c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4600,11 +4600,19 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
/*
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
- * a (NUMA hinting) fault is required.
+ * a (NUMA hinting or userfaultfd RWP) fault is required.
*/
static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma,
unsigned int flags)
{
+ /*
+ * VM_UFFD_RWP uses protnone as an access-tracking marker, not for
+ * NUMA hinting. GUP must always take a fault so the access is
+ * delivered to userfaultfd, regardless of FOLL_HONOR_NUMA_FAULT.
+ */
+ if (vma->vm_flags & VM_UFFD_RWP)
+ return false;
+
/*
* If callers don't want to honor NUMA hinting faults, no need to
* determine if we would actually have to trigger a NUMA hinting fault.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index afa218be15de..4f3fedcd75cf 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1895,8 +1895,11 @@ static enum scan_result try_collapse_pte_mapped_thp(struct mm_struct *mm, unsign
if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_FORCED_COLLAPSE, PMD_ORDER))
return SCAN_VMA_CHECK;
- /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
- if (userfaultfd_wp(vma))
+ /*
+ * Keep pmd pgtable while the uffd bit is in use; see comment in
+ * retract_page_tables().
+ */
+ if (userfaultfd_protected(vma))
return SCAN_PTE_UFFD;
folio = filemap_lock_folio(vma->vm_file->f_mapping,
@@ -2109,13 +2112,14 @@ static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)
return false;
/*
- * When a vma is registered with uffd-wp, we cannot recycle
+ * When a vma is registered with uffd-wp or RWP, we cannot recycle
* the page table because there may be pte markers installed.
- * Other vmas can still have the same file mapped hugely, but
- * skip this one: it will always be mapped in small page size
- * for uffd-wp registered ranges.
+ * VM_UFFD_RWP ranges similarly rely on per-PTE uffd state
+ * and cannot be recycled to a shared PMD. Other vmas can still
+ * have the same file mapped hugely, but skip this one: it will
+ * always be mapped in small page size for these registrations.
*/
- if (userfaultfd_wp(vma))
+ if (userfaultfd_protected(vma))
return false;
/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 546bc1cf9391..9fb733489898 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1965,7 +1965,7 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
if (pte_unused(pte))
return 1;
- if (userfaultfd_wp(vma))
+ if (userfaultfd_protected(vma))
return 1;
/*
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (6 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP Kiryl Shutsemau (Meta)
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Add the userspace interface for read-write protection tracking:
- UFFDIO_REGISTER_MODE_RWP register a range for RWP tracking
- UFFD_FEATURE_RWP capability bit
- UFFDIO_RWPROTECT install / remove RWP on a range
Introduce CONFIG_USERFAULTFD_RWP, auto-selected on 64-bit kernels with
ARCH_HAS_PTE_PROTNONE and HAVE_ARCH_USERFAULTFD_WP. The symbol gates
VM_UFFD_RWP (previously aliased to VM_NONE) and the smaps/trace-flag
hooks added in the preparatory patches; without it the UAPI bits added
here have nothing to drive and would be unreachable.
Registration sets VM_UFFD_RWP on the VMA. Combining MODE_WP with
MODE_RWP is rejected because both modes claim the uffd PTE bit.
UFFDIO_RWPROTECT is the bidirectional counterpart of
UFFDIO_WRITEPROTECT:
- MODE_RWP change_protection() with MM_CP_UFFD_RWP
installs PAGE_NONE and sets the uffd bit on
present PTEs
- !MODE_RWP change_protection() with MM_CP_UFFD_RWP_RESOLVE
restores vma->vm_page_prot and clears the bit
userfaultfd_clear_vma() runs the same resolve pass on unregister so
RWP state cannot outlive the uffd.
Re-registering a range must not drop a mode that installs per-PTE
markers (WP or RWP); doing so returns -EBUSY. This also closes a
pre-existing window where re-registering without MODE_WP would strand
uffd-wp markers: before, those caused extra write-faults but were
otherwise benign; with RWP preservation in place, a subsequent
mprotect() on a VM_UFFD_RWP VMA would silently promote the stale
markers to RWP.
The feature is not yet advertised. UFFDIO_REGISTER_MODE_RWP,
UFFD_FEATURE_RWP, and _UFFDIO_RWPROTECT are intentionally absent from
UFFD_API_REGISTER_MODES, UFFD_API_FEATURES, and UFFD_API_RANGE_IOCTLS,
so UFFDIO_API masks them out and the register-mode validator rejects
the bit. The follow-up patch adds fault dispatch and exposes the UAPI.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
Documentation/admin-guide/mm/userfaultfd.rst | 10 ++
include/linux/userfaultfd_k.h | 2 +
include/uapi/linux/userfaultfd.h | 19 ++
mm/Kconfig | 9 +
mm/userfaultfd.c | 180 ++++++++++++++++++-
5 files changed, 217 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index e5cc8848dcb3..1e533639fd50 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -131,6 +131,16 @@ userfaults on the range registered. Not all ioctls will necessarily be
supported for all memory types (e.g. anonymous memory vs. shmem vs.
hugetlbfs), or all types of intercepted faults.
+.. note::
+
+ Re-registering an already-registered range must not drop any of the
+ modes that install per-PTE markers — currently
+ ``UFFDIO_REGISTER_MODE_WP`` and ``UFFDIO_REGISTER_MODE_RWP``. Doing
+ so would strand markers with no flag to describe them, so the call
+ is rejected with ``-EBUSY``; userspace must issue
+ ``UFFDIO_UNREGISTER`` first. This differs from older kernels, which
+ silently replaced the mode bits on re-registration.
+
Userland can use the ``uffdio_register.ioctls`` to manage the virtual
address space in the background (to add or potentially also remove
memory from the ``userfaultfd`` registered range). This means a userfault
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 16fbe11c0c55..f78d5d370d0a 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -150,6 +150,8 @@ static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_at
extern long uffd_wp_range(struct vm_area_struct *vma,
unsigned long start, unsigned long len, bool enable_wp);
+extern int mrwprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
+ unsigned long len, bool enable_rwp);
/* move_pages */
void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 2841e4ea8f2c..7b78aa3b5318 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -79,6 +79,7 @@
#define _UFFDIO_WRITEPROTECT (0x06)
#define _UFFDIO_CONTINUE (0x07)
#define _UFFDIO_POISON (0x08)
+#define _UFFDIO_RWPROTECT (0x09)
#define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */
@@ -103,6 +104,8 @@
struct uffdio_continue)
#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
struct uffdio_poison)
+#define UFFDIO_RWPROTECT _IOWR(UFFDIO, _UFFDIO_RWPROTECT, \
+ struct uffdio_rwprotect)
/* read() structure */
struct uffd_msg {
@@ -158,6 +161,7 @@ struct uffd_msg {
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
+#define UFFD_PAGEFAULT_FLAG_RWP (1<<3) /* If reason is VM_UFFD_RWP */
struct uffdio_api {
/* userland asks for an API number and the features to enable */
@@ -230,6 +234,11 @@ struct uffdio_api {
*
* UFFD_FEATURE_MOVE indicates that the kernel supports moving an
* existing page contents from userspace.
+ *
+ * UFFD_FEATURE_RWP indicates that the kernel supports
+ * UFFDIO_REGISTER_MODE_RWP for read-write protection tracking.
+ * Pages are made inaccessible via UFFDIO_RWPROTECT and faults
+ * are delivered when the pages are re-accessed.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -248,6 +257,7 @@ struct uffdio_api {
#define UFFD_FEATURE_POISON (1<<14)
#define UFFD_FEATURE_WP_ASYNC (1<<15)
#define UFFD_FEATURE_MOVE (1<<16)
+#define UFFD_FEATURE_RWP (1<<17)
__u64 features;
__u64 ioctls;
@@ -263,6 +273,7 @@ struct uffdio_register {
#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
+#define UFFDIO_REGISTER_MODE_RWP ((__u64)1<<3)
__u64 mode;
/*
@@ -356,6 +367,14 @@ struct uffdio_poison {
__s64 updated;
};
+struct uffdio_rwprotect {
+ struct uffdio_range range;
+ /* !RWP means undo RWP-protection */
+#define UFFDIO_RWPROTECT_MODE_RWP ((__u64)1<<0)
+#define UFFDIO_RWPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
+
struct uffdio_move {
__u64 dst;
__u64 src;
diff --git a/mm/Kconfig b/mm/Kconfig
index 776b67c66e82..fac01bcfc0d1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1333,6 +1333,15 @@ config HAVE_ARCH_USERFAULTFD_MINOR
help
Arch has userfaultfd minor fault support
+config USERFAULTFD_RWP
+ def_bool y
+ depends on 64BIT && ARCH_HAS_PTE_PROTNONE && HAVE_ARCH_USERFAULTFD_WP
+ help
+ Userfaultfd read-write protection (UFFDIO_RWPROTECT) delivers a
+ userfaultfd notification on every access -- read or write -- to a
+ protected range, letting userspace observe the working set of a
+ process.
+
menuconfig USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9799abff1e76..78eb63702649 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1157,6 +1157,75 @@ static int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
return err;
}
+int mrwprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
+ unsigned long len, bool enable_rwp)
+{
+ struct mm_struct *dst_mm = ctx->mm;
+ unsigned long end = start + len;
+ struct vm_area_struct *dst_vma;
+ unsigned int mm_cp_flags;
+ struct mmu_gather tlb;
+ bool found = false;
+ VMA_ITERATOR(vmi, dst_mm, start);
+
+ VM_WARN_ON_ONCE(start & ~PAGE_MASK);
+ VM_WARN_ON_ONCE(len & ~PAGE_MASK);
+ VM_WARN_ON_ONCE(start + len <= start);
+
+ guard(mmap_read_lock)(dst_mm);
+ guard(rwsem_read)(&ctx->map_changing_lock);
+
+ if (atomic_read(&ctx->mmap_changing))
+ return -EAGAIN;
+
+ if (enable_rwp)
+ mm_cp_flags = MM_CP_UFFD_RWP;
+ else
+ mm_cp_flags = MM_CP_UFFD_RWP_RESOLVE;
+
+ /*
+ * Pre-scan the range: validate every spanned VMA before applying
+ * any change_protection() so a partial failure cannot leave the
+ * process with only a prefix of the range re-protected.
+ */
+ for_each_vma_range(vmi, dst_vma, end) {
+ if (!userfaultfd_rwp(dst_vma))
+ return -ENOENT;
+
+ if (is_vm_hugetlb_page(dst_vma)) {
+ unsigned long page_mask;
+
+ page_mask = vma_kernel_pagesize(dst_vma) - 1;
+ if ((start & page_mask) || (len & page_mask))
+ return -EINVAL;
+ }
+ found = true;
+ }
+ if (!found)
+ return -ENOENT;
+
+ vma_iter_set(&vmi, start);
+ tlb_gather_mmu(&tlb, dst_mm);
+ for_each_vma_range(vmi, dst_vma, end) {
+ unsigned long vma_start = max(dst_vma->vm_start, start);
+ unsigned long vma_end = min(dst_vma->vm_end, end);
+ unsigned int flags = mm_cp_flags;
+
+ /*
+ * On resolve, try to upgrade writability per-VMA --
+ * MM_CP_TRY_CHANGE_WRITABLE WARNs in
+ * maybe_change_pte_writable() if the VMA is not VM_WRITE,
+ * and RWP can be registered on PROT_READ-only mappings.
+ */
+ if (!enable_rwp && vma_wants_manual_pte_write_upgrade(dst_vma))
+ flags |= MM_CP_TRY_CHANGE_WRITABLE;
+
+ change_protection(&tlb, dst_vma, vma_start, vma_end, flags);
+ }
+ tlb_finish_mmu(&tlb);
+
+ return 0;
+}
void double_pt_lock(spinlock_t *ptl1,
spinlock_t *ptl2)
@@ -2197,9 +2266,22 @@ static struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
if (start == vma->vm_start && end == vma->vm_end)
give_up_on_oom = true;
- /* Reset ptes for the whole vma range if wr-protected */
- if (userfaultfd_wp(vma))
- uffd_wp_range(vma, start, end - start, false);
+ /* Clear the uffd bit and/or restore protnone PTEs */
+ if (userfaultfd_protected(vma)) {
+ unsigned int mm_cp_flags = 0;
+ struct mmu_gather tlb;
+
+ if (userfaultfd_wp(vma))
+ mm_cp_flags |= MM_CP_UFFD_WP_RESOLVE;
+ if (userfaultfd_rwp(vma))
+ mm_cp_flags |= MM_CP_UFFD_RWP_RESOLVE;
+ if (vma_wants_manual_pte_write_upgrade(vma))
+ mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
+
+ tlb_gather_mmu(&tlb, vma->vm_mm);
+ change_protection(&tlb, vma, start, end, mm_cp_flags);
+ tlb_finish_mmu(&tlb);
+ }
ret = vma_modify_flags_uffd(vmi, prev, vma, start, end,
&new_vma_flags, NULL_VM_UFFD_CTX,
@@ -2248,6 +2330,14 @@ static int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
vma_test_all_mask(vma, vma_flags))
goto skip;
+ /*
+ * Pre-scan in userfaultfd_register() already rejected mode
+ * switches that would drop VM_UFFD_WP or VM_UFFD_RWP, so a
+ * stray bit here is a bug.
+ */
+ VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx == ctx &&
+ vma->vm_flags & (VM_UFFD_WP | VM_UFFD_RWP) & ~vm_flags);
+
if (vma->vm_start > start)
start = vma->vm_start;
vma_end = min(end, vma->vm_end);
@@ -2514,6 +2604,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE;
if (reason & VM_UFFD_WP)
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
+ if (reason & VM_UFFD_RWP)
+ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_RWP;
if (reason & VM_UFFD_MINOR)
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_MINOR;
if (features & UFFD_FEATURE_THREAD_ID)
@@ -3593,6 +3685,22 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vm_flags |= VM_UFFD_WP;
}
+ if (uffdio_register.mode & UFFDIO_REGISTER_MODE_RWP) {
+ if (!pgtable_supports_uffd() || VM_UFFD_RWP == VM_NONE)
+ goto out;
+ if (!(ctx->features & UFFD_FEATURE_RWP))
+ goto out;
+ vm_flags |= VM_UFFD_RWP;
+ }
+
+ /*
+ * WP and RWP share the uffd PTE bit and
+ * cannot coexist in the same VMA — the bit would carry ambiguous
+ * semantics. Reject the combination up front.
+ */
+ if ((vm_flags & VM_UFFD_WP) && (vm_flags & VM_UFFD_RWP))
+ goto out;
+
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) {
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
goto out;
@@ -3686,6 +3794,16 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
cur->vm_userfaultfd_ctx.ctx != ctx)
goto out_unlock;
+ /*
+ * Mode switches that drop VM_UFFD_WP or VM_UFFD_RWP would
+ * leave PTE markers without the flag that describes them;
+ * subsequent mprotect() would then promote stale markers
+ * into the other mode. Require an unregister first.
+ */
+ if (cur->vm_userfaultfd_ctx.ctx == ctx &&
+ cur->vm_flags & (VM_UFFD_WP | VM_UFFD_RWP) & ~vm_flags)
+ goto out_unlock;
+
/*
* Note vmas containing huge pages
*/
@@ -3719,6 +3837,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR))
ioctls_out &= ~((__u64)1 << _UFFDIO_CONTINUE);
+ /* RWPROTECT is only supported for RWP ranges */
+ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_RWP))
+ ioctls_out &= ~((__u64)1 << _UFFDIO_RWPROTECT);
+
/*
* Now that we scanned all vmas we can already tell
* userland which ioctls methods are guaranteed to
@@ -4066,6 +4188,55 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
return ret;
}
+static int userfaultfd_rwprotect(struct userfaultfd_ctx *ctx,
+ unsigned long arg)
+{
+ int ret;
+ struct uffdio_rwprotect uffdio_rwp;
+ struct userfaultfd_wake_range range;
+ bool mode_rwp, mode_dontwake;
+
+ if (atomic_read(&ctx->mmap_changing))
+ return -EAGAIN;
+
+ if (copy_from_user(&uffdio_rwp, (void __user *)arg,
+ sizeof(uffdio_rwp)))
+ return -EFAULT;
+
+ ret = validate_range(ctx->mm, uffdio_rwp.range.start,
+ uffdio_rwp.range.len);
+ if (ret)
+ return ret;
+
+ if (uffdio_rwp.mode & ~(UFFDIO_RWPROTECT_MODE_DONTWAKE |
+ UFFDIO_RWPROTECT_MODE_RWP))
+ return -EINVAL;
+
+ mode_rwp = uffdio_rwp.mode & UFFDIO_RWPROTECT_MODE_RWP;
+ mode_dontwake = uffdio_rwp.mode & UFFDIO_RWPROTECT_MODE_DONTWAKE;
+
+ if (mode_rwp && mode_dontwake)
+ return -EINVAL;
+
+ if (mmget_not_zero(ctx->mm)) {
+ ret = mrwprotect_range(ctx, uffdio_rwp.range.start,
+ uffdio_rwp.range.len, mode_rwp);
+ mmput(ctx->mm);
+ } else {
+ return -ESRCH;
+ }
+
+ if (ret)
+ return ret;
+
+ if (!mode_rwp && !mode_dontwake) {
+ range.start = uffdio_rwp.range.start;
+ range.len = uffdio_rwp.range.len;
+ wake_userfault(ctx, &range);
+ }
+ return ret;
+}
+
static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
{
__s64 ret;
@@ -4372,6 +4543,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
case UFFDIO_POISON:
ret = userfaultfd_poison(ctx, arg);
break;
+ case UFFDIO_RWPROTECT:
+ ret = userfaultfd_rwprotect(ctx, arg);
+ break;
}
return ret;
}
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (7 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution Kiryl Shutsemau (Meta)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Wire the fault side of read-write protection tracking and turn the
userspace interface on.
An RWP-protected PTE is PAGE_NONE with the uffd bit set. The
PROT_NONE triggers a fault on any access; the uffd bit distinguishes
it from plain mprotect(PROT_NONE) or NUMA hinting.
Fault dispatch, per level:
PTE handle_pte_fault() -> do_uffd_rwp()
PMD __handle_mm_fault() -> do_huge_pmd_uffd_rwp()
hugetlb hugetlb_fault() -> hugetlb_handle_userfault()
The RWP branches gate on userfaultfd_pte_rwp() / userfaultfd_huge_pmd_rwp()
(VM_UFFD_RWP plus the uffd bit) and fall through to do_numa_page() /
do_huge_pmd_numa_page() otherwise. Each delivers a
UFFD_PAGEFAULT_FLAG_RWP message through handle_userfault(); the handler
resolves it with UFFDIO_RWPROTECT clearing MODE_RWP.
userfaultfd_must_wait() and userfaultfd_huge_must_wait() add matching
protnone+uffd waiters so sync-mode fault handlers block correctly.
Expose the UAPI:
UFFDIO_REGISTER_MODE_RWP -> UFFD_API_REGISTER_MODES
UFFD_FEATURE_RWP -> UFFD_API_FEATURES
_UFFDIO_RWPROTECT -> UFFD_API_RANGE_IOCTLS
UFFD_API_RANGE_IOCTLS_BASIC
UFFD_FEATURE_RWP is masked out at UFFDIO_API time when PROT_NONE is
not available or VM_UFFD_RWP aliases VM_NONE (32-bit), so userspace
never sees an advertised-but-broken feature.
Works on anonymous, shmem, and hugetlb memory.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/huge_mm.h | 7 +++++++
include/linux/userfaultfd_k.h | 24 ++++++++++++++++++++++++
include/uapi/linux/userfaultfd.h | 12 ++++++++----
mm/huge_memory.c | 5 +++++
mm/hugetlb.c | 11 +++++++++++
mm/memory.c | 21 +++++++++++++++++++--
mm/userfaultfd.c | 32 ++++++++++++++++++++++++++++++--
7 files changed, 104 insertions(+), 8 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index edece3e26985..fe48d76957fb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -529,6 +529,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t do_huge_pmd_uffd_rwp(struct vm_fault *vmf);
+
vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf);
extern struct folio *huge_zero_folio;
@@ -716,6 +718,11 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
return NULL;
}
+static inline vm_fault_t do_huge_pmd_uffd_rwp(struct vm_fault *vmf)
+{
+ return 0;
+}
+
static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
return 0;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f78d5d370d0a..d8f5f400c8ef 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -233,6 +233,18 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
return userfaultfd_wp(vma) && pmd_uffd(pmd);
}
+static inline bool userfaultfd_pte_rwp(struct vm_area_struct *vma,
+ pte_t pte)
+{
+ return userfaultfd_rwp(vma) && pte_uffd(pte);
+}
+
+static inline bool userfaultfd_huge_pmd_rwp(struct vm_area_struct *vma,
+ pmd_t pmd)
+{
+ return userfaultfd_rwp(vma) && pmd_uffd(pmd);
+}
+
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
return vma->vm_flags & __VM_UFFD_FLAGS;
@@ -363,6 +375,18 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
return false;
}
+static inline bool userfaultfd_pte_rwp(struct vm_area_struct *vma,
+ pte_t pte)
+{
+ return false;
+}
+
+static inline bool userfaultfd_huge_pmd_rwp(struct vm_area_struct *vma,
+ pmd_t pmd)
+{
+ return false;
+}
+
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
return false;
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 7b78aa3b5318..d803e76d47ad 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -25,7 +25,8 @@
#define UFFD_API ((__u64)0xAA)
#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
UFFDIO_REGISTER_MODE_WP | \
- UFFDIO_REGISTER_MODE_MINOR)
+ UFFDIO_REGISTER_MODE_MINOR | \
+ UFFDIO_REGISTER_MODE_RWP)
#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
UFFD_FEATURE_EVENT_FORK | \
UFFD_FEATURE_EVENT_REMAP | \
@@ -42,7 +43,8 @@
UFFD_FEATURE_WP_UNPOPULATED | \
UFFD_FEATURE_POISON | \
UFFD_FEATURE_WP_ASYNC | \
- UFFD_FEATURE_MOVE)
+ UFFD_FEATURE_MOVE | \
+ UFFD_FEATURE_RWP)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -54,13 +56,15 @@
(__u64)1 << _UFFDIO_MOVE | \
(__u64)1 << _UFFDIO_WRITEPROTECT | \
(__u64)1 << _UFFDIO_CONTINUE | \
- (__u64)1 << _UFFDIO_POISON)
+ (__u64)1 << _UFFDIO_POISON | \
+ (__u64)1 << _UFFDIO_RWPROTECT)
#define UFFD_API_RANGE_IOCTLS_BASIC \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_WRITEPROTECT | \
(__u64)1 << _UFFDIO_CONTINUE | \
- (__u64)1 << _UFFDIO_POISON)
+ (__u64)1 << _UFFDIO_POISON | \
+ (__u64)1 << _UFFDIO_RWPROTECT)
/*
* Valid ioctl command number range with this API is from 0x00 to
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8620ba92263f..cd32bd51e311 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2289,6 +2289,11 @@ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
return pmd_dirty(pmd);
}
+vm_fault_t do_huge_pmd_uffd_rwp(struct vm_fault *vmf)
+{
+ return handle_userfault(vmf, VM_UFFD_RWP);
+}
+
/* NUMA hinting page fault entry point for trans huge pmds */
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8555810cd42e..f63718296cc2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6062,6 +6062,17 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_mutex;
}
+ /*
+ * Protnone hugetlb PTEs with the uffd bit are used by
+ * userfaultfd RWP for access tracking. Plain PROT_NONE (without the
+ * marker) is not an RWP fault and is not expected on hugetlb (no
+ * NUMA hinting), so let normal hugetlb fault handling proceed.
+ */
+ if (pte_protnone(vmf.orig_pte) && vma_is_accessible(vma) &&
+ userfaultfd_rwp(vma) && huge_pte_uffd(vmf.orig_pte)) {
+ return hugetlb_handle_userfault(&vmf, mapping, VM_UFFD_RWP);
+ }
+
/*
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
diff --git a/mm/memory.c b/mm/memory.c
index e4ae5350db41..3e393881031d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6135,6 +6135,12 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
}
}
+static vm_fault_t do_uffd_rwp(struct vm_fault *vmf)
+{
+ pte_unmap(vmf->pte);
+ return handle_userfault(vmf, VM_UFFD_RWP);
+}
+
static vm_fault_t do_numa_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -6410,8 +6416,16 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
if (!pte_present(vmf->orig_pte))
return do_swap_page(vmf);
- if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
+ if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) {
+ /*
+ * RWP-protected PTEs are protnone plus the uffd bit. On a
+ * VM_UFFD_RWP VMA, a protnone PTE without the uffd bit is
+ * NUMA hinting and must still fall through to do_numa_page().
+ */
+ if (userfaultfd_pte_rwp(vmf->vma, vmf->orig_pte))
+ return do_uffd_rwp(vmf);
return do_numa_page(vmf);
+ }
spin_lock(vmf->ptl);
entry = vmf->orig_pte;
@@ -6525,8 +6539,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
return 0;
}
if (pmd_trans_huge(vmf.orig_pmd)) {
- if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
+ if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) {
+ if (userfaultfd_huge_pmd_rwp(vma, vmf.orig_pmd))
+ return do_huge_pmd_uffd_rwp(&vmf);
return do_huge_pmd_numa_page(&vmf);
+ }
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
!pmd_write(vmf.orig_pmd)) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 78eb63702649..b966df47800c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -2650,6 +2650,12 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
*/
if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
return true;
+ /*
+ * PTE is still RW-protected (protnone with uffd bit), wait for
+ * resolution. Plain PROT_NONE without the marker is not an RWP fault.
+ */
+ if (pte_protnone(pte) && huge_pte_uffd(pte) && (reason & VM_UFFD_RWP))
+ return true;
return false;
}
@@ -2710,8 +2716,14 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
if (!pmd_present(_pmd))
return false;
- if (pmd_trans_huge(_pmd))
- return !pmd_write(_pmd) && (reason & VM_UFFD_WP);
+ if (pmd_trans_huge(_pmd)) {
+ if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
+ return true;
+ if (pmd_protnone(_pmd) && pmd_uffd(_pmd) &&
+ (reason & VM_UFFD_RWP))
+ return true;
+ return false;
+ }
pte = pte_offset_map(pmd, address);
if (!pte)
@@ -2736,6 +2748,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
*/
if (!pte_write(ptent) && (reason & VM_UFFD_WP))
goto out;
+ /*
+ * PTE is still RW-protected (protnone with uffd bit), wait for
+ * userspace to resolve. Plain PROT_NONE without the marker is not
+ * an RWP fault.
+ */
+ if (pte_protnone(ptent) && pte_uffd(ptent) && (reason & VM_UFFD_RWP))
+ goto out;
ret = false;
out:
@@ -4477,6 +4496,15 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
}
+ /*
+ * RWP needs both PROT_NONE support and the uffd-wp PTE bit. The
+ * VM_UFFD_RWP check covers compile-time unavailability; the
+ * pgtable_supports_uffd() check covers runtime (e.g. riscv
+ * without the SVRSW60T59B extension) where the PTE bit is declared
+ * but not actually usable.
+ */
+ if (VM_UFFD_RWP == VM_NONE || !pgtable_supports_uffd())
+ uffdio_api.features &= ~UFFD_FEATURE_RWP;
ret = -EINVAL;
if (features & ~uffdio_api.features)
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (8 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
2026-05-25 11:37 ` [PATCH v4 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution Kiryl Shutsemau (Meta)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
PAGEMAP_SCAN already reports PAGE_IS_WRITTEN from the inverted uffd
PTE bit, targeting the UFFDIO_WRITEPROTECT workflow. UFFDIO_RWPROTECT
reuses the same PTE bit as a marker for read-write protection, but
"has been written" and "has been accessed" are distinct semantic
signals — they happen to share one PTE bit today only because the two
implementations share infrastructure.
Give RWP its own pagemap category so the UAPI does not conflate them:
PAGE_IS_WRITTEN reported on VM_UFFD_WP VMAs, !pte_uffd(pte)
PAGE_IS_ACCESSED reported on VM_UFFD_RWP VMAs, !pte_uffd(pte)
Both still read the same PTE bit today, but each is scoped to the VMA
whose registered mode makes the bit meaningful. If a future
implementation moves RWP to a separate PTE bit, only PAGE_IS_ACCESSED
switches over.
This is a UAPI narrowing. Outside VM_UFFD_WP VMAs the uffd bit is
always clear, so PAGEMAP_SCAN used to flag PAGE_IS_WRITTEN on every
present PTE there — a meaningless duplicate of PAGE_IS_PRESENT. Now
PAGE_IS_WRITTEN fires only inside VM_UFFD_WP VMAs.
pagemap_hugetlb_category() now takes the vma like its PTE/PMD peers.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
Documentation/admin-guide/mm/pagemap.rst | 13 ++++-
fs/proc/task_mmu.c | 73 ++++++++++++++++++------
include/uapi/linux/fs.h | 1 +
tools/include/uapi/linux/fs.h | 1 +
4 files changed, 67 insertions(+), 21 deletions(-)
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index c57e61b5d8aa..ffa690a171c8 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -19,8 +19,11 @@ There are four components to pagemap:
* Bit 55 pte is soft-dirty (see
Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped (since 4.2)
- * Bit 57 pte is uffd-wp write-protected (since 5.13) (see
- Documentation/admin-guide/mm/userfaultfd.rst)
+ * Bit 57 pte is tracked by userfaultfd (since 5.13) — in a
+ ``VM_UFFD_WP`` VMA this indicates a write-protected PTE; in a
+ ``VM_UFFD_RWP`` VMA it indicates an RWP-protected PTE. WP and
+ RWP are mutually exclusive per VMA, so the meaning is
+ unambiguous. See Documentation/admin-guide/mm/userfaultfd.rst.
* Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page)
* Bits 59-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
@@ -244,7 +247,8 @@ in this IOCTL:
Following flags about pages are currently supported:
- ``PAGE_IS_WPALLOWED`` - Page has async-write-protection enabled
-- ``PAGE_IS_WRITTEN`` - Page has been written to from the time it was write protected
+- ``PAGE_IS_WRITTEN`` - Page in a ``UFFDIO_REGISTER_MODE_WP`` VMA has been
+ written to since it was write-protected. Only reported inside such VMAs.
- ``PAGE_IS_FILE`` - Page is file backed
- ``PAGE_IS_PRESENT`` - Page is present in the memory
- ``PAGE_IS_SWAPPED`` - Page is in swapped
@@ -252,6 +256,9 @@ Following flags about pages are currently supported:
- ``PAGE_IS_HUGE`` - Page is PMD-mapped THP or Hugetlb backed
- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
- ``PAGE_IS_GUARD`` - Page is a part of a guard region
+- ``PAGE_IS_ACCESSED`` - Page in a ``UFFDIO_REGISTER_MODE_RWP`` VMA has been
+ accessed since RWP was applied. Only reported inside such VMAs. See
+ Documentation/admin-guide/mm/userfaultfd.rst for the RWP workflow.
The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5e74dadfb1cb..97fb941871a3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2284,7 +2284,7 @@ static const struct mm_walk_ops pagemap_ops = {
* Bits 5-54 swap offset if swapped
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
- * Bit 57 pte is uffd-wp write-protected
+ * Bit 57 pte is tracked by userfaultfd (uffd-wp or RWP)
* Bit 58 pte is a guard region
* Bits 59-60 zero
* Bit 61 page is file-page or shared-anon
@@ -2419,7 +2419,7 @@ static int pagemap_release(struct inode *inode, struct file *file)
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY | \
- PAGE_IS_GUARD)
+ PAGE_IS_GUARD | PAGE_IS_ACCESSED)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
struct pagemap_scan_private {
@@ -2444,8 +2444,12 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories = PAGE_IS_PRESENT;
- if (!pte_uffd(pte))
- categories |= PAGE_IS_WRITTEN;
+ if (!pte_uffd(pte)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page(vma, addr, pte);
@@ -2462,8 +2466,12 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories = PAGE_IS_SWAPPED;
- if (!pte_swp_uffd_any(pte))
- categories |= PAGE_IS_WRITTEN;
+ if (!pte_swp_uffd_any(pte)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
entry = softleaf_from_pte(pte);
if (softleaf_is_guard_marker(entry))
@@ -2512,8 +2520,12 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
struct page *page;
categories |= PAGE_IS_PRESENT;
- if (!pmd_uffd(pmd))
- categories |= PAGE_IS_WRITTEN;
+ if (!pmd_uffd(pmd)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
if (p->masks_of_interest & PAGE_IS_FILE) {
page = vm_normal_page_pmd(vma, addr, pmd);
@@ -2527,8 +2539,12 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_SOFT_DIRTY;
} else {
categories |= PAGE_IS_SWAPPED;
- if (!pmd_swp_uffd(pmd))
- categories |= PAGE_IS_WRITTEN;
+ if (!pmd_swp_uffd(pmd)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
if (pmd_swp_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
@@ -2561,7 +2577,8 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_HUGETLB_PAGE
-static unsigned long pagemap_hugetlb_category(pte_t pte)
+static unsigned long pagemap_hugetlb_category(struct vm_area_struct *vma,
+ pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
@@ -2576,8 +2593,12 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
- if (!huge_pte_uffd(pte))
- categories |= PAGE_IS_WRITTEN;
+ if (!huge_pte_uffd(pte)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
if (!PageAnon(pte_page(pte)))
categories |= PAGE_IS_FILE;
if (is_zero_pfn(pte_pfn(pte)))
@@ -2587,8 +2608,12 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
} else {
categories |= PAGE_IS_SWAPPED;
- if (!pte_swp_uffd_any(pte))
- categories |= PAGE_IS_WRITTEN;
+ if (!pte_swp_uffd_any(pte)) {
+ if (userfaultfd_wp(vma))
+ categories |= PAGE_IS_WRITTEN;
+ if (userfaultfd_rwp(vma))
+ categories |= PAGE_IS_ACCESSED;
+ }
if (pte_swp_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
}
@@ -2673,6 +2698,16 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
bool wp_allowed = userfaultfd_wp_async(vma) &&
userfaultfd_wp_use_markers(vma);
+ /*
+ * PM_SCAN_WP_MATCHING is the atomic read-and-reset flavour of the
+ * scan and is implemented for the WP marker only. Reject it on
+ * VM_UFFD_RWP VMAs explicitly so userspace gets a clear error
+ * instead of a silently-skipped range; re-arming is done with
+ * UFFDIO_RWPROTECT(MODE_RWP).
+ */
+ if (userfaultfd_rwp(vma) && (p->arg.flags & PM_SCAN_WP_MATCHING))
+ return -EINVAL;
+
if (!wp_allowed) {
/* User requested explicit failure over wp-async capability */
if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
@@ -2860,7 +2895,8 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
goto flush_and_return;
}
- if (!p->arg.category_anyof_mask && !p->arg.category_inverted &&
+ if (userfaultfd_wp(vma) && !p->arg.category_anyof_mask &&
+ !p->arg.category_inverted &&
p->arg.category_mask == PAGE_IS_WRITTEN &&
p->arg.return_mask == PAGE_IS_WRITTEN) {
for (addr = start; addr < end; pte++, addr += PAGE_SIZE) {
@@ -2935,7 +2971,8 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
/* Go the short route when not write-protecting pages. */
pte = huge_ptep_get(walk->mm, start, ptep);
- categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
+ categories = p->cur_vma_category |
+ pagemap_hugetlb_category(vma, pte);
if (!pagemap_scan_is_interesting_page(categories, p))
return 0;
@@ -2947,7 +2984,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
pte = huge_ptep_get(walk->mm, start, ptep);
- categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
+ categories = p->cur_vma_category | pagemap_hugetlb_category(vma, pte);
if (!pagemap_scan_is_interesting_page(categories, p))
goto out_unlock;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 13f71202845e..c4aeaa0c31c7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -455,6 +455,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
#define PAGE_IS_GUARD (1 << 8)
+#define PAGE_IS_ACCESSED (1 << 9)
/*
* struct page_region - Page region with flags
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 24ddf7bc4f25..f0a26309b6d5 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -364,6 +364,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
#define PAGE_IS_GUARD (1 << 8)
+#define PAGE_IS_ACCESSED (1 << 9)
/*
* struct page_region - Page region with flags
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution
2026-05-25 11:37 [PATCH v4 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
` (9 preceding siblings ...)
2026-05-25 11:37 ` [PATCH v4 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking Kiryl Shutsemau (Meta)
@ 2026-05-25 11:37 ` Kiryl Shutsemau (Meta)
10 siblings, 0 replies; 12+ messages in thread
From: Kiryl Shutsemau (Meta) @ 2026-05-25 11:37 UTC (permalink / raw)
To: akpm, rppt, peterx, david
Cc: ljs, surenb, vbabka, Liam.Howlett, ziy, corbet, skhan, seanjc,
pbonzini, jthoughton, aarcange, sj, usama.arif, linux-mm,
linux-kernel, linux-doc, linux-kselftest, kvm, kernel-team,
Kiryl Shutsemau (Meta)
Sync RWP delivers a message and blocks the faulting thread until the
handler resolves the fault. For working-set tracking the VMM does not
need the message: it just needs to know, at scan time, which pages
were touched. Async RWP serves that use case — the kernel restores
access in-place and the faulting thread continues without blocking.
The VMM reconstructs the access pattern after the fact via
PAGEMAP_SCAN: pages whose uffd bit is still set (inverted
PAGE_IS_ACCESSED) were not re-accessed since the last RWP cycle.
Worth calling out: async resolution upgrades writable private anon
PTEs via pte_mkwrite() when can_change_pte_writable() allows, mirroring
do_numa_page(). Without it, every re-access of an RWP'd writable page
would COW-fault a second time.
UFFD_FEATURE_RWP_ASYNC requires UFFD_FEATURE_RWP.
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/userfaultfd_k.h | 6 ++++++
include/uapi/linux/userfaultfd.h | 11 ++++++++++-
mm/huge_memory.c | 25 ++++++++++++++++++++++++-
mm/hugetlb.c | 32 +++++++++++++++++++++++++++++++-
mm/memory.c | 27 +++++++++++++++++++++++++--
mm/userfaultfd.c | 19 ++++++++++++++++++-
6 files changed, 114 insertions(+), 6 deletions(-)
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index d8f5f400c8ef..43b2fb587ce3 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -278,6 +278,7 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
extern bool userfaultfd_wp_async(struct vm_area_struct *vma);
+extern bool userfaultfd_rwp_async(struct vm_area_struct *vma);
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
{
@@ -456,6 +457,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
+static inline bool userfaultfd_rwp_async(struct vm_area_struct *vma)
+{
+ return false;
+}
+
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
{
return false;
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index d803e76d47ad..c10f08f8a618 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -44,7 +44,8 @@
UFFD_FEATURE_POISON | \
UFFD_FEATURE_WP_ASYNC | \
UFFD_FEATURE_MOVE | \
- UFFD_FEATURE_RWP)
+ UFFD_FEATURE_RWP | \
+ UFFD_FEATURE_RWP_ASYNC)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -243,6 +244,13 @@ struct uffdio_api {
* UFFDIO_REGISTER_MODE_RWP for read-write protection tracking.
* Pages are made inaccessible via UFFDIO_RWPROTECT and faults
* are delivered when the pages are re-accessed.
+ *
+ * UFFD_FEATURE_RWP_ASYNC indicates asynchronous mode for
+ * UFFDIO_REGISTER_MODE_RWP. When set, faults on read-write
+ * protected pages are auto-resolved by the kernel (PTE
+ * permissions restored immediately) without delivering a message
+ * to the userfaultfd handler. Use PAGEMAP_SCAN with inverted
+ * PAGE_IS_ACCESSED to find pages that were not re-accessed.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -262,6 +270,7 @@ struct uffdio_api {
#define UFFD_FEATURE_WP_ASYNC (1<<15)
#define UFFD_FEATURE_MOVE (1<<16)
#define UFFD_FEATURE_RWP (1<<17)
+#define UFFD_FEATURE_RWP_ASYNC (1<<18)
__u64 features;
__u64 ioctls;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cd32bd51e311..803fbc41e501 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2291,7 +2291,30 @@ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
vm_fault_t do_huge_pmd_uffd_rwp(struct vm_fault *vmf)
{
- return handle_userfault(vmf, VM_UFFD_RWP);
+ struct vm_area_struct *vma = vmf->vma;
+ pmd_t pmd;
+
+ if (!userfaultfd_rwp_async(vma))
+ return handle_userfault(vmf, VM_UFFD_RWP);
+
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) {
+ spin_unlock(vmf->ptl);
+ return 0;
+ }
+ pmd = pmd_modify(vmf->orig_pmd, vma->vm_page_prot);
+ /* pmd_modify() preserves _PAGE_UFFD; drop it on resolution */
+ pmd = pmd_clear_uffd(pmd);
+ pmd = pmd_mkyoung(pmd);
+ if (!pmd_write(pmd) &&
+ vma_wants_manual_pte_write_upgrade(vma) &&
+ can_change_pmd_writable(vma, vmf->address, pmd))
+ pmd = pmd_mkwrite(pmd, vma);
+ set_pmd_at(vma->vm_mm, vmf->address & HPAGE_PMD_MASK,
+ vmf->pmd, pmd);
+ update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+ spin_unlock(vmf->ptl);
+ return 0;
}
/* NUMA hinting page fault entry point for trans huge pmds */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f63718296cc2..a5ff9018af06 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6070,7 +6070,37 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (pte_protnone(vmf.orig_pte) && vma_is_accessible(vma) &&
userfaultfd_rwp(vma) && huge_pte_uffd(vmf.orig_pte)) {
- return hugetlb_handle_userfault(&vmf, mapping, VM_UFFD_RWP);
+ spinlock_t *ptl;
+ pte_t pte;
+
+ /* Sync: drop hugetlb locks before blocking in handle_userfault() */
+ if (!userfaultfd_rwp_async(vma))
+ return hugetlb_handle_userfault(&vmf, mapping, VM_UFFD_RWP);
+
+ ptl = huge_pte_lock(h, mm, vmf.pte);
+ pte = huge_ptep_get(mm, vmf.address, vmf.pte);
+ if (pte_protnone(pte) && huge_pte_uffd(pte)) {
+ unsigned int shift = huge_page_shift(h);
+
+ pte = huge_pte_modify(pte, vma->vm_page_prot);
+ pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
+ /* huge_pte_modify() preserves _PAGE_UFFD; drop it on resolution */
+ pte = huge_pte_clear_uffd(pte);
+ pte = pte_mkyoung(pte);
+ /*
+ * Unlike do_uffd_rwp(), do not upgrade to writable
+ * here. Hugetlb lacks a can_change_huge_pte_writable()
+ * equivalent, so a write access will take a separate
+ * COW fault — acceptable for the rare private hugetlb
+ * case.
+ */
+ set_huge_pte_at(mm, vmf.address, vmf.pte, pte,
+ huge_page_size(h));
+ update_mmu_cache(vma, vmf.address, vmf.pte);
+ }
+ spin_unlock(ptl);
+ ret = 0;
+ goto out_mutex;
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 3e393881031d..89c9a44d07ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6137,8 +6137,31 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
static vm_fault_t do_uffd_rwp(struct vm_fault *vmf)
{
- pte_unmap(vmf->pte);
- return handle_userfault(vmf, VM_UFFD_RWP);
+ pte_t pte;
+
+ if (!userfaultfd_rwp_async(vmf->vma)) {
+ /* Sync mode: unmap PTE and deliver to userfaultfd handler */
+ pte_unmap(vmf->pte);
+ return handle_userfault(vmf, VM_UFFD_RWP);
+ }
+
+ spin_lock(vmf->ptl);
+ if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
+ pte = pte_modify(vmf->orig_pte, vmf->vma->vm_page_prot);
+ /* pte_modify() preserves _PAGE_UFFD; drop it on resolution */
+ pte = pte_clear_uffd(pte);
+ pte = pte_mkyoung(pte);
+ if (!pte_write(pte) &&
+ vma_wants_manual_pte_write_upgrade(vmf->vma) &&
+ can_change_pte_writable(vmf->vma, vmf->address, pte))
+ pte = pte_mkwrite(pte, vmf->vma);
+ set_pte_at(vmf->vma->vm_mm, vmf->address, vmf->pte, pte);
+ update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
}
static vm_fault_t do_numa_page(struct vm_fault *vmf)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index b966df47800c..20478bb37311 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -2478,6 +2478,11 @@ static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx)
return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC);
}
+static bool userfaultfd_rwp_async_ctx(struct userfaultfd_ctx *ctx)
+{
+ return ctx && (ctx->features & UFFD_FEATURE_RWP_ASYNC);
+}
+
/*
* Whether WP_UNPOPULATED is enabled on the uffd context. It is only
* meaningful when userfaultfd_wp()==true on the vma and when it's
@@ -4379,6 +4384,11 @@ bool userfaultfd_wp_async(struct vm_area_struct *vma)
return userfaultfd_wp_async_ctx(vma->vm_userfaultfd_ctx.ctx);
}
+bool userfaultfd_rwp_async(struct vm_area_struct *vma)
+{
+ return userfaultfd_rwp_async_ctx(vma->vm_userfaultfd_ctx.ctx);
+}
+
static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
@@ -4482,6 +4492,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
if (features & UFFD_FEATURE_WP_ASYNC)
features |= UFFD_FEATURE_WP_UNPOPULATED;
+ ret = -EINVAL;
+ /* RWP_ASYNC requires RWP */
+ if ((features & UFFD_FEATURE_RWP_ASYNC) &&
+ !(features & UFFD_FEATURE_RWP))
+ goto err_out;
+
/* report all available features and ioctls to userland */
uffdio_api.features = UFFD_API_FEATURES;
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
@@ -4504,7 +4520,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
* but not actually usable.
*/
if (VM_UFFD_RWP == VM_NONE || !pgtable_supports_uffd())
- uffdio_api.features &= ~UFFD_FEATURE_RWP;
+ uffdio_api.features &=
+ ~(UFFD_FEATURE_RWP | UFFD_FEATURE_RWP_ASYNC);
ret = -EINVAL;
if (features & ~uffdio_api.features)
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread