All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Rapoport <rppt@kernel.org>
To: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
Cc: akpm@linux-foundation.org, peterx@redhat.com, david@kernel.org,
	ljs@kernel.org, surenb@google.com, vbabka@kernel.org,
	Liam.Howlett@oracle.com, ziy@nvidia.com, corbet@lwn.net,
	skhan@linuxfoundation.org, seanjc@google.com,
	pbonzini@redhat.com, jthoughton@google.com, aarcange@redhat.com,
	sj@kernel.org, usama.arif@linux.dev, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kselftest@vger.kernel.org, kvm@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH v2 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag
Date: Tue, 12 May 2026 19:45:07 +0300	[thread overview]
Message-ID: <agNZE49m8Pkn8CeW@kernel.org> (raw)
In-Reply-To: <ff3420fdd75f58d56827ff3d2eaffc0d74154627.1778254670.git.kas@kernel.org>

On Fri, May 08, 2026 at 04:55:17PM +0100, Kiryl Shutsemau (Meta) wrote:
> Preparatory patch. Add the change_protection() primitive that
> userfaultfd RWP will use.
> 
> An RWP-protected PTE is PAGE_NONE with the uffd PTE bit set. The
> PROT_NONE half makes the CPU fault on any access; the uffd bit
> distinguishes an RWP fault from a plain mprotect(PROT_NONE) or NUMA
> hinting fault. MM_CP_UFFD_WP and MM_CP_UFFD_RWP share the same PTE
> bit, so the two cannot be used together on the same range.
> 
> Two new change_protection() flags:
> 
>   MM_CP_UFFD_RWP            install PAGE_NONE and set the uffd bit
>   MM_CP_UFFD_RWP_RESOLVE    restore vma->vm_page_prot, clear the uffd bit
> 
> Both are wired through change_pte_range(), change_huge_pmd(), and
> hugetlb_change_protection() so anon, shmem, THP, and hugetlb all
> share the same semantics.
> 
> Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> Assisted-by: Claude:claude-opus-4-6
> ---
>  include/linux/mm.h            |  5 +++++
>  include/linux/userfaultfd_k.h |  1 -
>  mm/huge_memory.c              | 20 ++++++++++++------
>  mm/hugetlb.c                  | 25 ++++++++++++++++------
>  mm/mprotect.c                 | 40 +++++++++++++++++++++++++++++------
>  5 files changed, 71 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 3f53d1e978c0..2b65416bb760 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3291,6 +3291,11 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
>  #define  MM_CP_UFFD_WP_RESOLVE             (1UL << 3) /* Resolve wp */
>  #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
>  					    MM_CP_UFFD_WP_RESOLVE)
> +/* Whether this change is for uffd RWP */
> +#define  MM_CP_UFFD_RWP                    (1UL << 4) /* do rwp */
> +#define  MM_CP_UFFD_RWP_RESOLVE            (1UL << 5) /* Resolve rwp */

Nit: any reason except copy/paset to use different case in "do rwp" and
"Resolve rwp"? ;-)

> +#define  MM_CP_UFFD_RWP_ALL                (MM_CP_UFFD_RWP | \
> +					    MM_CP_UFFD_RWP_RESOLVE)
>  
>  bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>  			     pte_t pte);
> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> index fcf308dba311..3725e61a7041 100644
> --- a/include/linux/userfaultfd_k.h
> +++ b/include/linux/userfaultfd_k.h
> @@ -397,7 +397,6 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
>  	return false;
>  }
>  
> -
>  static inline bool userfaultfd_armed(struct vm_area_struct *vma)
>  {
>  	return false;
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index d88fcccd386d..2537dca63c6c 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2665,6 +2665,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	spinlock_t *ptl;
>  	pmd_t oldpmd, entry;
>  	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
> +	bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
> +	bool uffd_rwp_resolve = cp_flags & MM_CP_UFFD_RWP_RESOLVE;
>  	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>  	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;

It looks like uffd_wp* are always ORed with uffd_rwp, we could fold this to
e.g.

	bool uffd_prot = cp_flags & (MM_CP_UFFD_WP | MM_CP_UFFD_RWP);

>  	int ret = 1;
> @@ -2679,11 +2681,18 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		return 0;
>  
>  	if (thp_migration_supported() && pmd_is_valid_softleaf(*pmd)) {
> -		change_non_present_huge_pmd(mm, addr, pmd, uffd_wp,
> -					    uffd_wp_resolve);
> +		change_non_present_huge_pmd(mm, addr, pmd,
> +					    uffd_wp || uffd_rwp,
> +					    uffd_wp_resolve || uffd_rwp_resolve);
>  		goto unlock;
>  	}
>  
> +	/* Already in the desired state */
> +	if (prot_numa && pmd_protnone(*pmd))
> +		goto unlock;
> +	if (uffd_rwp && pmd_protnone(*pmd) && pmd_uffd(*pmd))
> +		goto unlock;
> +
>  	if (prot_numa) {
>  
>  		/*
> @@ -2694,9 +2703,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  		if (is_huge_zero_pmd(*pmd))
>  			goto unlock;
>  
> -		if (pmd_protnone(*pmd))
> -			goto unlock;
> -
>  		if (!folio_can_map_prot_numa(pmd_folio(*pmd), vma,
>  					     vma_is_single_threaded_private(vma)))
>  			goto unlock;
> @@ -2725,9 +2731,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  	oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
>  
>  	entry = pmd_modify(oldpmd, newprot);
> -	if (uffd_wp)
> +	if (uffd_wp || uffd_rwp)
>  		entry = pmd_mkuffd(entry);
> -	else if (uffd_wp_resolve)
> +	else if (uffd_wp_resolve || uffd_rwp_resolve)
>  		/*
>  		 * Leave the write bit to be handled by PF interrupt
>  		 * handler, then things like COW could be properly
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 61cda9992043..63f6b19418b9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6434,6 +6436,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
>  
>  		ptep = hugetlb_walk(vma, address, psize);
>  		if (!ptep) {
> +			/*
> +			 * uffd_wp installs a pte marker on the unpopulated
> +			 * entry; RWP does not install markers so the

Nit:                              uffd_rwp

> +			 * allocation is unnecessary for it.
> +			 */
>  			if (!uffd_wp) {
>  				address |= last_addr_mask;
>  				continue;
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 8340c8b228c6..23e71f68cf7a 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -216,6 +216,8 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  {
>  	const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>  	const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
> +	const bool uffd_rwp = cp_flags & MM_CP_UFFD_RWP;
> +	const bool uffd_rwp_resolve = cp_flags & MM_CP_UFFD_RWP_RESOLVE;

And here a single pair of bools should be enough I think.

>  	softleaf_t entry = softleaf_from_pte(oldpte);
>  	pte_t newpte;
>  
> @@ -256,7 +258,7 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  		 * to unprotect it, drop it; the next page
>  		 * fault will trigger without uffd trapping.
>  		 */
> -		if (uffd_wp_resolve) {
> +		if (uffd_wp_resolve || uffd_rwp_resolve) {
>  			pte_clear(vma->vm_mm, addr, pte);
>  			return 1;
>  		}
> @@ -265,9 +267,9 @@ static long change_softleaf_pte(struct vm_area_struct *vma,
>  		newpte = oldpte;
>  	}
>  
> -	if (uffd_wp)
> +	if (uffd_wp || uffd_rwp)
>  		newpte = pte_swp_mkuffd(newpte);
> -	else if (uffd_wp_resolve)
> +	else if (uffd_wp_resolve || uffd_rwp_resolve)
>  		newpte = pte_swp_clear_uffd(newpte);
>  
>  	if (!pte_same(oldpte, newpte)) {

-- 
Sincerely yours,
Mike.

  reply	other threads:[~2026-05-12 16:45 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-08 15:55 [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 02/14] mm: rename uffd-wp PTE bit macros to uffd Kiryl Shutsemau (Meta)
2026-05-08 23:52   ` SeongJae Park
2026-05-08 15:55 ` [PATCH v2 03/14] mm: rename uffd-wp PTE accessors " Kiryl Shutsemau (Meta)
2026-05-14  1:31   ` SeongJae Park
2026-05-08 15:55 ` [PATCH v2 04/14] mm: add VM_UFFD_RWP VMA flag Kiryl Shutsemau (Meta)
2026-05-12 16:48   ` Mike Rapoport
2026-05-15  0:29   ` SeongJae Park
2026-05-08 15:55 ` [PATCH v2 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag Kiryl Shutsemau (Meta)
2026-05-12 16:45   ` Mike Rapoport [this message]
2026-05-08 15:55 ` [PATCH v2 06/14] mm: preserve RWP marker across PTE rewrites Kiryl Shutsemau (Meta)
2026-05-12 16:59   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP Kiryl Shutsemau (Meta)
2026-05-12 17:00   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing Kiryl Shutsemau (Meta)
2026-05-12 17:20   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP Kiryl Shutsemau (Meta)
2026-05-12 17:29   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking Kiryl Shutsemau (Meta)
2026-05-12 17:41   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution Kiryl Shutsemau (Meta)
2026-05-12 18:05   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 12/14] userfaultfd: add UFFDIO_SET_MODE for runtime sync/async toggle Kiryl Shutsemau (Meta)
2026-05-12 18:11   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 13/14] selftests/mm: add userfaultfd RWP tests Kiryl Shutsemau (Meta)
2026-05-13  6:06   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 14/14] Documentation/userfaultfd: document RWP working set tracking Kiryl Shutsemau (Meta)
2026-05-13  6:26   ` Mike Rapoport
2026-05-08 17:32 ` [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Andrew Morton
2026-05-08 22:48   ` Kiryl Shutsemau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=agNZE49m8Pkn8CeW@kernel.org \
    --to=rppt@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=jthoughton@google.com \
    --cc=kas@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=seanjc@google.com \
    --cc=sj@kernel.org \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=usama.arif@linux.dev \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.