Re: [PATCH v3 0/2] mm/mprotect: micro-optimization work

public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@linux-foundation.org>
To: Pedro Falcato <pfalcato@suse.de>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Lorenzo Stoakes <ljs@kernel.org>,
	Vlastimil Babka <vbabka@kernel.org>, Jann Horn <jannh@google.com>,
	David Hildenbrand <david@kernel.org>, Dev Jain <dev.jain@arm.com>,
	Luke Yang <luyang@redhat.com>,
	jhladky@redhat.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 0/2] mm/mprotect: micro-optimization work
Date: Thu, 2 Apr 2026 11:33:28 -0700	[thread overview]
Message-ID: <20260402113328.fd0a6f0e28cf74a651fa2291@linux-foundation.org> (raw)
In-Reply-To: <20260402141628.3367596-1-pfalcato@suse.de>

On Thu,  2 Apr 2026 15:16:26 +0100 Pedro Falcato <pfalcato@suse.de> wrote:

> Micro-optimize the change_protection functionality and the
> change_pte_range() routine. This set of functions works in an incredibly
> tight loop, and even small inefficiencies are incredibly evident when spun
> hundreds, thousands or hundreds of thousands of times.

Thanks, I updated mm.git's mm-unstable branch to this version.

The update is rather large.  If people think it best to spill this work
into next -rc1 then please advise.

> 
> v3:
>  - Collapse a few lines into a single line in patch 1 (David)
>  - Bring the inlining to a higher level (David)
>  - Pick up David's patch 1 ACK (thank you!)
>  - Pick up Luke Yang's Tested-by (thank you!)
>  - Add Luke's results and akpmify the Links: a bit (cover letter)

Here's how v3 altered mm.git:

 mm/mprotect.c |   98 +++++++++++++++++++++++++++---------------------
 1 file changed, 56 insertions(+), 42 deletions(-)

--- a/mm/mprotect.c~b
+++ a/mm/mprotect.c
@@ -103,7 +103,7 @@ bool can_change_pte_writable(struct vm_a
 	return can_change_shared_pte_writable(vma, pte);
 }
 
-static __always_inline int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
+static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
 				    pte_t pte, int max_nr_ptes, fpb_t flags)
 {
 	/* No underlying folio, so cannot batch */
@@ -143,7 +143,7 @@ static __always_inline void prot_commit_
  * !PageAnonExclusive() pages, starting from start_idx. Caller must enforce
  * that the ptes point to consecutive pages of the same anon large folio.
  */
-static int page_anon_exclusive_sub_batch(int start_idx, int max_len,
+static __always_inline int page_anon_exclusive_sub_batch(int start_idx, int max_len,
 		struct page *first_page, bool expected_anon_exclusive)
 {
 	int idx;
@@ -177,13 +177,6 @@ static __always_inline void commit_anon_
 	int sub_batch_idx = 0;
 	int len;
 
-	/* Optimize for the common order-0 case. */
-	if (likely(nr_ptes == 1)) {
-		prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, 1,
-				       0, PageAnonExclusive(first_page), tlb);
-		return;
-	}
-
 	while (nr_ptes) {
 		expected_anon_exclusive = PageAnonExclusive(first_page + sub_batch_idx);
 		len = page_anon_exclusive_sub_batch(sub_batch_idx, nr_ptes,
@@ -195,7 +188,7 @@ static __always_inline void commit_anon_
 	}
 }
 
-static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
+static __always_inline void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
 		struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep,
 		pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
 {
@@ -234,8 +227,7 @@ static long change_softleaf_pte(struct v
 		 * just be safe and disable write
 		 */
 		if (folio_test_anon(folio))
-			entry = make_readable_exclusive_migration_entry(
-					     swp_offset(entry));
+			entry = make_readable_exclusive_migration_entry(swp_offset(entry));
 		else
 			entry = make_readable_migration_entry(swp_offset(entry));
 		newpte = swp_entry_to_pte(entry);
@@ -246,8 +238,7 @@ static long change_softleaf_pte(struct v
 		 * We do not preserve soft-dirtiness. See
 		 * copy_nonpresent_pte() for explanation.
 		 */
-		entry = make_readable_device_private_entry(
-					swp_offset(entry));
+		entry = make_readable_device_private_entry(swp_offset(entry));
 		newpte = swp_entry_to_pte(entry);
 		if (pte_swp_uffd_wp(oldpte))
 			newpte = pte_swp_mkuffd_wp(newpte);
@@ -286,6 +277,45 @@ static long change_softleaf_pte(struct v
 	return 0;
 }
 
+static __always_inline void change_present_ptes(struct mmu_gather *tlb,
+		struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+		int nr_ptes, unsigned long end, pgprot_t newprot,
+		struct folio *folio, struct page *page, unsigned long cp_flags)
+{
+	const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+	const bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+	pte_t ptent, oldpte;
+
+	oldpte = modify_prot_start_ptes(vma, addr, ptep, nr_ptes);
+	ptent = pte_modify(oldpte, newprot);
+
+	if (uffd_wp)
+		ptent = pte_mkuffd_wp(ptent);
+	else if (uffd_wp_resolve)
+		ptent = pte_clear_uffd_wp(ptent);
+
+	/*
+	 * In some writable, shared mappings, we might want
+	 * to catch actual write access -- see
+	 * vma_wants_writenotify().
+	 *
+	 * In all writable, private mappings, we have to
+	 * properly handle COW.
+	 *
+	 * In both cases, we can sometimes still change PTEs
+	 * writable and avoid the write-fault handler, for
+	 * example, if a PTE is already dirty and no other
+	 * COW or special handling is required.
+	 */
+	if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
+	     !pte_write(ptent))
+		set_write_prot_commit_flush_ptes(vma, folio, page,
+			addr, ptep, oldpte, ptent, nr_ptes, tlb);
+	else
+		prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent,
+			nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
+}
+
 static long change_pte_range(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
 		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
@@ -296,7 +326,6 @@ static long change_pte_range(struct mmu_
 	bool is_private_single_threaded;
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
 	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
-	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 	int nr_ptes;
 
 	tlb_change_page_size(tlb, PAGE_SIZE);
@@ -317,7 +346,6 @@ static long change_pte_range(struct mmu_
 			int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
 			struct folio *folio = NULL;
 			struct page *page;
-			pte_t ptent;
 
 			/* Already in the desired state. */
 			if (prot_numa && pte_protnone(oldpte))
@@ -343,34 +371,20 @@ static long change_pte_range(struct mmu_
 
 			nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags);
 
-			oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes);
-			ptent = pte_modify(oldpte, newprot);
-
-			if (uffd_wp)
-				ptent = pte_mkuffd_wp(ptent);
-			else if (uffd_wp_resolve)
-				ptent = pte_clear_uffd_wp(ptent);
-
 			/*
-			 * In some writable, shared mappings, we might want
-			 * to catch actual write access -- see
-			 * vma_wants_writenotify().
-			 *
-			 * In all writable, private mappings, we have to
-			 * properly handle COW.
-			 *
-			 * In both cases, we can sometimes still change PTEs
-			 * writable and avoid the write-fault handler, for
-			 * example, if a PTE is already dirty and no other
-			 * COW or special handling is required.
+			 * Optimize for the small-folio common case by
+			 * special-casing it here. Compiler constant propagation
+			 * plus copious amounts of __always_inline does wonders.
 			 */
-			if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
-			     !pte_write(ptent))
-				set_write_prot_commit_flush_ptes(vma, folio, page,
-				addr, pte, oldpte, ptent, nr_ptes, tlb);
-			else
-				prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
-					nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
+			if (likely(nr_ptes == 1)) {
+				change_present_ptes(tlb, vma, addr, pte, 1,
+					end, newprot, folio, page, cp_flags);
+			} else {
+				change_present_ptes(tlb, vma, addr, pte,
+					nr_ptes, end, newprot, folio, page,
+					cp_flags);
+			}
+
 			pages += nr_ptes;
 		} else if (pte_none(oldpte)) {
 			/*
_

next prev parent reply	other threads:[~2026-04-02 18:33 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-02 14:16 [PATCH v3 0/2] mm/mprotect: micro-optimization work Pedro Falcato
2026-04-02 14:16 ` [PATCH v3 1/2] mm/mprotect: move softleaf code out of the main function Pedro Falcato
2026-04-07  9:58   ` Vlastimil Babka (SUSE)
2026-04-02 14:16 ` [PATCH v3 2/2] mm/mprotect: special-case small folios when applying write permissions Pedro Falcato
2026-04-02 14:21   ` Pedro Falcato
2026-04-02 18:29     ` Andrew Morton
2026-04-06  8:28       ` Pedro Falcato
2026-04-07  0:50   ` Davidlohr Bueso
2026-04-07  8:21     ` Pedro Falcato
2026-04-07 12:31   ` Vlastimil Babka (SUSE)
2026-04-07 14:01     ` Pedro Falcato
2026-04-02 18:33 ` Andrew Morton [this message]
2026-04-06  8:29   ` [PATCH v3 0/2] mm/mprotect: micro-optimization work Pedro Falcato
2026-04-06  9:09     ` David Hildenbrand (arm)
2026-04-06 17:38       ` Andrew Morton
2026-04-07 10:08         ` Lorenzo Stoakes (Oracle)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260402113328.fd0a6f0e28cf74a651fa2291@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=david@kernel.org \
    --cc=dev.jain@arm.com \
    --cc=jannh@google.com \
    --cc=jhladky@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=luyang@redhat.com \
    --cc=pfalcato@suse.de \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox