The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Mike Rapoport <rppt@kernel.org>
To: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
Cc: akpm@linux-foundation.org, peterx@redhat.com, david@kernel.org,
	ljs@kernel.org, surenb@google.com, vbabka@kernel.org,
	Liam.Howlett@oracle.com, ziy@nvidia.com, corbet@lwn.net,
	skhan@linuxfoundation.org, seanjc@google.com,
	pbonzini@redhat.com, jthoughton@google.com, aarcange@redhat.com,
	sj@kernel.org, usama.arif@linux.dev, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kselftest@vger.kernel.org, kvm@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH v2 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag
Date: Tue, 12 May 2026 19:45:07 +0300	[thread overview]
Message-ID: <agNZE49m8Pkn8CeW@kernel.org> (raw)
In-Reply-To: <ff3420fdd75f58d56827ff3d2eaffc0d74154627.1778254670.git.kas@kernel.org>

On Fri, May 08, 2026 at 04:55:17PM +0100, Kiryl Shutsemau (Meta) wrote:
> Preparatory patch. Add the change_protection() primitive that
> userfaultfd RWP will use.
> 
> An RWP-protected PTE is PAGE_NONE with the uffd PTE bit set. The
> PROT_NONE half makes the CPU fault on any access; the uffd bit
> distinguishes an RWP fault from a plain mprotect(PROT_NONE) or NUMA
> hinting fault. MM_CP_UFFD_WP and MM_CP_UFFD_RWP share the same PTE
> bit, so the two cannot be used together on the same range.
> 
> Two new change_protection() flags:
> 
>   MM_CP_UFFD_RWP            install PAGE_NONE and set the uffd bit
>   MM_CP_UFFD_RWP_RESOLVE    restore vma->vm_page_prot, clear the uffd bit
> 
> Both are wired through change_pte_range(), change_huge_pmd(), and
> hugetlb_change_protection() so anon, shmem, THP, and hugetlb all
> share the same semantics.
> 
> Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> Assisted-by: Claude:claude-opus-4-6
> ---
>  include/linux/mm.h            |  5 +++++
>  include/linux/userfaultfd_k.h |  1 -
>  mm/huge_memory.c              | 20 ++++++++++++------
>  mm/hugetlb.c                  | 25 ++++++++++++++++------
>  mm/mprotect.c                 | 40 +++++++++++++++++++++++++++++------
>  5 files changed, 71 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 3f53d1e978c0..2b65416bb760 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3291,6 +3291,11 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
>  #define  MM_CP_UFFD_WP_RESOLVE             (1UL << 3) /* Resolve wp */
>  #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
>  					    MM_CP_UFFD_WP_RESOLVE)
> +/* Whether this change is for uffd RWP */
> +#define  MM_CP_UFFD_RWP                    (1UL << 4) /* do rwp */
> +#define  MM_CP_UFFD_RWP_RESOLVE            (1UL << 5) /* Resolve rwp */

Nit: any reason except copy/paset to use different case in "do rwp" and
"Resolve rwp"? ;-)

> +#define  MM_CP_UFFD_RWP_ALL                (MM_CP_UFFD_RWP | \
> +					    MM_CP_UFFD_RWP_RESOLVE)
>  
>  bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>  			     pte_t pte);
> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> index fcf308dba311..3725e61a7041 100644
> --- a/include/linux/userfaultfd_k.h
> +++ b/include/linux/userfaultfd_k.h
> @@ -397,7 +397,6 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
>  	return false;
>  }
>  
> -
>  static inline bool userfaultfd_armed(struct vm_area_struct *vma)
>  {
>  	return false;
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index d88fcccd386d..2537dca63c6c 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2665,6 +2665,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	spinlock_t *ptl;
>  	pmd_t oldpmd, entry;
>  	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
> +	bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
> +	bool uffd_rwp_resolve = cp_flags & MM_CP_UFFD_RWP_RESOLVE;
>  	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>  	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;

It looks like uffd_wp* are always ORed with uffd_rwp, we could fold this to
e.g.

	bool uffd_prot = cp_flags & (MM_CP_UFFD_WP | MM_CP_UFFD_RWP);

>  	int ret = 1;
> @@ -2679,11 +2681,18 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		return 0;
>  
>  	if (thp_migration_supported() && pmd_is_valid_softleaf(*pmd)) {
> -		change_non_present_huge_pmd(mm, addr, pmd, uffd_wp,
> -					    uffd_wp_resolve);
> +		change_non_present_huge_pmd(mm, addr, pmd,
> +					    uffd_wp || uffd_rwp,
> +					    uffd_wp_resolve || uffd_rwp_resolve);
>  		goto unlock;
>  	}
>  
> +	/* Already in the desired state */
> +	if (prot_numa && pmd_protnone(*pmd))
> +		goto unlock;
> +	if (uffd_rwp && pmd_protnone(*pmd) && pmd_uffd(*pmd))
> +		goto unlock;
> +
>  	if (prot_numa) {
>  
>  		/*
> @@ -2694,9 +2703,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		if (is_huge_zero_pmd(*pmd))
>  			goto unlock;
>  
> -		if (pmd_protnone(*pmd))
> -			goto unlock;
> -
>  		if (!folio_can_map_prot_numa(pmd_folio(*pmd), vma,
>  					     vma_is_single_threaded_private(vma)))
>  			goto unlock;
> @@ -2725,9 +2731,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
>  
>  	entry = pmd_modify(oldpmd, newprot);
> -	if (uffd_wp)
> +	if (uffd_wp || uffd_rwp)
>  		entry = pmd_mkuffd(entry);
> -	else if (uffd_wp_resolve)
> +	else if (uffd_wp_resolve || uffd_rwp_resolve)
>  		/*
>  		 * Leave the write bit to be handled by PF interrupt
>  		 * handler, then things like COW could be properly
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 61cda9992043..63f6b19418b9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6434,6 +6436,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
>  
>  		ptep = hugetlb_walk(vma, address, psize);
>  		if (!ptep) {
> +			/*
> +			 * uffd_wp installs a pte marker on the unpopulated
> +			 * entry; RWP does not install markers so the

Nit:                              uffd_rwp

> +			 * allocation is unnecessary for it.
> +			 */
>  			if (!uffd_wp) {
>  				address |= last_addr_mask;
>  				continue;
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 8340c8b228c6..23e71f68cf7a 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -216,6 +216,8 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  {
>  	const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>  	const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
> +	const bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
> +	const bool uffd_rwp_resolve = cp_flags & MM_CP_UFFD_RWP_RESOLVE;

And here a single pair of bools should be enough I think.

>  	softleaf_t entry = softleaf_from_pte(oldpte);
>  	pte_t newpte;
>  
> @@ -256,7 +258,7 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  		 * to unprotect it, drop it; the next page
>  		 * fault will trigger without uffd trapping.
>  		 */
> -		if (uffd_wp_resolve) {
> +		if (uffd_wp_resolve || uffd_rwp_resolve) {
>  			pte_clear(vma->vm_mm, addr, pte);
>  			return 1;
>  		}
> @@ -265,9 +267,9 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  		newpte = oldpte;
>  	}
>  
> -	if (uffd_wp)
> +	if (uffd_wp || uffd_rwp)
>  		newpte = pte_swp_mkuffd(newpte);
> -	else if (uffd_wp_resolve)
> +	else if (uffd_wp_resolve || uffd_rwp_resolve)
>  		newpte = pte_swp_clear_uffd(newpte);
>  
>  	if (!pte_same(oldpte, newpte)) {

-- 
Sincerely yours,
Mike.

  reply	other threads:[~2026-05-12 16:45 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-08 15:55 [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 02/14] mm: rename uffd-wp PTE bit macros to uffd Kiryl Shutsemau (Meta)
2026-05-08 23:52   ` SeongJae Park
2026-05-08 15:55 ` [PATCH v2 03/14] mm: rename uffd-wp PTE accessors " Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 04/14] mm: add VM_UFFD_RWP VMA flag Kiryl Shutsemau (Meta)
2026-05-12 16:48   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag Kiryl Shutsemau (Meta)
2026-05-12 16:45   ` Mike Rapoport [this message]
2026-05-08 15:55 ` [PATCH v2 06/14] mm: preserve RWP marker across PTE rewrites Kiryl Shutsemau (Meta)
2026-05-12 16:59   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP Kiryl Shutsemau (Meta)
2026-05-12 17:00   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing Kiryl Shutsemau (Meta)
2026-05-12 17:20   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP Kiryl Shutsemau (Meta)
2026-05-12 17:29   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking Kiryl Shutsemau (Meta)
2026-05-12 17:41   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution Kiryl Shutsemau (Meta)
2026-05-12 18:05   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 12/14] userfaultfd: add UFFDIO_SET_MODE for runtime sync/async toggle Kiryl Shutsemau (Meta)
2026-05-12 18:11   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 13/14] selftests/mm: add userfaultfd RWP tests Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 14/14] Documentation/userfaultfd: document RWP working set tracking Kiryl Shutsemau (Meta)
2026-05-08 17:32 ` [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Andrew Morton
2026-05-08 22:48   ` Kiryl Shutsemau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=agNZE49m8Pkn8CeW@kernel.org \
    --to=rppt@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=jthoughton@google.com \
    --cc=kas@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=seanjc@google.com \
    --cc=sj@kernel.org \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=usama.arif@linux.dev \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox