[PATCH v5 2/7] mm: Optimize mprotect() for MM_CP_PROT_NUMA by batch-skipping PTEs

linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

From: Dev Jain <dev.jain@arm.com>
To: akpm@linux-foundation.org
Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	catalin.marinas@arm.com, will@kernel.org,
	Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com,
	vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com,
	peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com,
	baohua@kernel.org, kevin.brodsky@arm.com,
	quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu,
	yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org,
	hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com,
	Dev Jain <dev.jain@arm.com>
Subject: [PATCH v5 2/7] mm: Optimize mprotect() for MM_CP_PROT_NUMA by batch-skipping PTEs
Date: Fri, 18 Jul 2025 14:32:39 +0530	[thread overview]
Message-ID: <20250718090244.21092-3-dev.jain@arm.com> (raw)
In-Reply-To: <20250718090244.21092-1-dev.jain@arm.com>

For the MM_CP_PROT_NUMA skipping case, observe that, if we skip an
iteration due to the underlying folio satisfying any of the skip
conditions, then for all subsequent ptes which map the same folio, the
iteration will be skipped for them too. Therefore, we can optimize
by using folio_pte_batch() to batch skip the iterations.

Use prot_numa_skip() introduced in the previous patch to determine whether
we need to skip the iteration. Change its signature to have a double
pointer to a folio, which will be used by mprotect_folio_pte_batch() to
determine the number of iterations we can safely skip.

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 mm/mprotect.c | 55 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2a9c73bd0778..97adc62c50ab 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -83,28 +83,43 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
 	return pte_dirty(pte);
 }
 
+static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
+				    pte_t pte, int max_nr_ptes)
+{
+	/* No underlying folio, so cannot batch */
+	if (!folio)
+		return 1;
+
+	if (!folio_test_large(folio))
+		return 1;
+
+	return folio_pte_batch(folio, ptep, pte, max_nr_ptes);
+}
+
 static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
-			   pte_t oldpte, pte_t *pte, int target_node)
+			   pte_t oldpte, pte_t *pte, int target_node,
+			   struct folio **foliop)
 {
-	struct folio *folio;
+	struct folio *folio = NULL;
+	bool ret = true;
 	bool toptier;
 	int nid;
 
 	/* Avoid TLB flush if possible */
 	if (pte_protnone(oldpte))
-		return true;
+		goto skip;
 
 	folio = vm_normal_folio(vma, addr, oldpte);
 	if (!folio)
-		return true;
+		goto skip;
 
 	if (folio_is_zone_device(folio) || folio_test_ksm(folio))
-		return true;
+		goto skip;
 
 	/* Also skip shared copy-on-write pages */
 	if (is_cow_mapping(vma->vm_flags) &&
 	    (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio)))
-		return true;
+		goto skip;
 
 	/*
 	 * While migration can move some dirty pages,
@@ -112,7 +127,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
 	 * context.
 	 */
 	if (folio_is_file_lru(folio) && folio_test_dirty(folio))
-		return true;
+		goto skip;
 
 	/*
 	 * Don't mess with PTEs if page is already on the node
@@ -120,7 +135,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
 	 */
 	nid = folio_nid(folio);
 	if (target_node == nid)
-		return true;
+		goto skip;
 
 	toptier = node_is_toptier(nid);
 
@@ -129,11 +144,15 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
 	 * balancing is disabled
 	 */
 	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
-		return true;
+		goto skip;
 
+	ret = false;
 	if (folio_use_access_time(folio))
 		folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
-	return false;
+
+skip:
+	*foliop = folio;
+	return ret;
 }
 
 static long change_pte_range(struct mmu_gather *tlb,
@@ -147,6 +166,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
 	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
 	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+	int nr_ptes;
 
 	tlb_change_page_size(tlb, PAGE_SIZE);
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -161,8 +181,11 @@ static long change_pte_range(struct mmu_gather *tlb,
 	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 	do {
+		nr_ptes = 1;
 		oldpte = ptep_get(pte);
 		if (pte_present(oldpte)) {
+			int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
+			struct folio *folio;
 			pte_t ptent;
 
 			/*
@@ -170,9 +193,15 @@ static long change_pte_range(struct mmu_gather *tlb,
 			 * pages. See similar comment in change_huge_pmd.
 			 */
 			if (prot_numa) {
-				if (prot_numa_skip(vma, addr, oldpte, pte,
-						   target_node))
+				int ret = prot_numa_skip(vma, addr, oldpte, pte,
+							 target_node, &folio);
+				if (ret) {
+
+					/* determine batch to skip */
+					nr_ptes = mprotect_folio_pte_batch(folio,
+						  pte, oldpte, max_nr_ptes);
 					continue;
+				}
 			}
 
 			oldpte = ptep_modify_prot_start(vma, addr, pte);
@@ -289,7 +318,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 				pages++;
 			}
 		}
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
-- 
2.30.2

next prev parent reply	other threads:[~2025-07-18  9:10 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-18  9:02 [PATCH v5 0/7] Optimize mprotect() for large folios Dev Jain
2025-07-18  9:02 ` [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function Dev Jain
2025-07-18 16:19   ` Lorenzo Stoakes
2025-07-20 23:44   ` Barry Song
2025-07-21  3:44     ` Dev Jain
2025-07-22 11:05       ` Dev Jain
2025-07-22 11:25   ` Ryan Roberts
2025-07-23 13:57   ` Zi Yan
2025-07-18  9:02 ` Dev Jain [this message]
2025-07-18 16:40   ` [PATCH v5 2/7] mm: Optimize mprotect() for MM_CP_PROT_NUMA by batch-skipping PTEs Lorenzo Stoakes
2025-07-22 11:26   ` Ryan Roberts
2025-07-23 14:25   ` Zi Yan
2025-07-18  9:02 ` [PATCH v5 3/7] mm: Add batched versions of ptep_modify_prot_start/commit Dev Jain
2025-07-18 17:05   ` Lorenzo Stoakes
2025-07-20 23:59   ` Barry Song
2025-07-22 11:35   ` Ryan Roberts
2025-07-23 15:09   ` Zi Yan
2025-07-18  9:02 ` [PATCH v5 4/7] mm: Introduce FPB_RESPECT_WRITE for PTE batching infrastructure Dev Jain
2025-07-18 17:12   ` Lorenzo Stoakes
2025-07-22 11:37   ` Ryan Roberts
2025-07-23 15:28   ` Zi Yan
2025-07-23 15:32     ` Dev Jain
2025-07-18  9:02 ` [PATCH v5 5/7] mm: Split can_change_pte_writable() into private and shared parts Dev Jain
2025-07-18 17:27   ` Lorenzo Stoakes
2025-07-23 15:40   ` Zi Yan
2025-07-18  9:02 ` [PATCH v5 6/7] mm: Optimize mprotect() by PTE batching Dev Jain
2025-07-18 18:49   ` Lorenzo Stoakes
2025-07-19 13:46     ` Dev Jain
2025-07-20 11:20       ` Lorenzo Stoakes
2025-07-20 14:39         ` Dev Jain
2025-07-24 19:55   ` Zi Yan
2025-08-06  8:08   ` David Hildenbrand
2025-08-06  8:12     ` David Hildenbrand
2025-08-06  8:15     ` Will Deacon
2025-08-06  8:19       ` David Hildenbrand
2025-08-06  8:53     ` Dev Jain
2025-08-06  8:56       ` David Hildenbrand
2025-08-06  9:12     ` Lorenzo Stoakes
2025-08-06  9:21       ` David Hildenbrand
2025-08-06  9:37         ` Dev Jain
2025-08-06  9:50           ` Lorenzo Stoakes
     [not found]             ` <1b3d4799-2a57-4f16-973b-82fc7b438862@arm.com>
2025-08-06 10:07               ` Dev Jain
2025-08-06 10:12               ` David Hildenbrand
2025-08-06 10:11             ` David Hildenbrand
2025-08-06 10:20               ` Dev Jain
2025-08-06 10:28                 ` David Hildenbrand
2025-08-06 10:45                 ` Lorenzo Stoakes
2025-08-06 10:45               ` Lorenzo Stoakes
2025-07-18  9:02 ` [PATCH v5 7/7] arm64: Add batched versions of ptep_modify_prot_start/commit Dev Jain
2025-07-18 18:50   ` Lorenzo Stoakes
2025-07-21 15:57   ` Catalin Marinas
2025-07-18  9:50 ` [PATCH v5 0/7] Optimize mprotect() for large folios Dev Jain
2025-07-18 18:53   ` Lorenzo Stoakes

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2a9c73bd077 dfblob:97adc62c50a )
 OR (
bs:"[PATCH v5 2/7] mm: Optimize mprotect() for MM_CP_PROT_NUMA by batch-skipping PTEs" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250718090244.21092-3-dev.jain@arm.com \
    --to=dev.jain@arm.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=ioworker0@gmail.com \
    --cc=jannh@google.com \
    --cc=joey.gouly@arm.com \
    --cc=kevin.brodsky@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=peterx@redhat.com \
    --cc=quic_zhenhuah@quicinc.com \
    --cc=ryan.roberts@arm.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=yangyicong@hisilicon.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).