All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dev Jain <dev.jain@arm.com>
To: akpm@linux-foundation.org
Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	catalin.marinas@arm.com, will@kernel.org,
	Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com,
	vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com,
	peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com,
	baohua@kernel.org, kevin.brodsky@arm.com,
	quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu,
	yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org,
	namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com,
	ziy@nvidia.com, Dev Jain <dev.jain@arm.com>
Subject: [PATCH 2/7] mm: Optimize mprotect() by batch-skipping PTEs
Date: Mon, 28 Apr 2025 17:34:09 +0530	[thread overview]
Message-ID: <20250428120414.12101-3-dev.jain@arm.com> (raw)
In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com>

In case of prot_numa, there are various cases in which we can skip to the
next iteration. Since the skip condition is based on the folio and not
the PTEs, we can skip a PTE batch.

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 mm/mprotect.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8d635c7fc81f..33eabc995584 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -91,6 +91,9 @@ static bool prot_numa_skip(struct vm_area_struct *vma, struct folio *folio,
 	bool toptier;
 	int nid;
 
+	if (folio_is_zone_device(folio) || folio_test_ksm(folio))
+		return true;
+
 	/* Also skip shared copy-on-write pages */
 	if (is_cow_mapping(vma->vm_flags) &&
 	    (folio_maybe_dma_pinned(folio) ||
@@ -126,8 +129,10 @@ static bool prot_numa_skip(struct vm_area_struct *vma, struct folio *folio,
 }
 
 static bool prot_numa_avoid_fault(struct vm_area_struct *vma,
-		unsigned long addr, pte_t oldpte, int target_node)
+		unsigned long addr, pte_t *pte, pte_t oldpte, int target_node,
+		int max_nr, int *nr)
 {
+	const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
 	struct folio *folio;
 	int ret;
 
@@ -136,12 +141,16 @@ static bool prot_numa_avoid_fault(struct vm_area_struct *vma,
 		return true;
 
 	folio = vm_normal_folio(vma, addr, oldpte);
-	if (!folio || folio_is_zone_device(folio) ||
-	    folio_test_ksm(folio))
+	if (!folio)
 		return true;
+
 	ret = prot_numa_skip(vma, folio, target_node);
-	if (ret)
+	if (ret) {
+		if (folio_test_large(folio) && max_nr != 1)
+			*nr = folio_pte_batch(folio, addr, pte, oldpte,
+					      max_nr, flags, NULL, NULL, NULL);
 		return ret;
+	}
 	if (folio_use_access_time(folio))
 		folio_xchg_access_time(folio,
 			jiffies_to_msecs(jiffies));
@@ -159,6 +168,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
 	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
 	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+	int nr;
 
 	tlb_change_page_size(tlb, PAGE_SIZE);
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -173,8 +183,10 @@ static long change_pte_range(struct mmu_gather *tlb,
 	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 	do {
+		nr = 1;
 		oldpte = ptep_get(pte);
 		if (pte_present(oldpte)) {
+			int max_nr = (end - addr) >> PAGE_SHIFT;
 			pte_t ptent;
 
 			/*
@@ -182,8 +194,9 @@ static long change_pte_range(struct mmu_gather *tlb,
 			 * pages. See similar comment in change_huge_pmd.
 			 */
 			if (prot_numa &&
-			    prot_numa_avoid_fault(vma, addr,
-						  oldpte, target_node))
+			    prot_numa_avoid_fault(vma, addr, pte,
+						  oldpte, target_node,
+							  max_nr, &nr))
 					continue;
 
 			oldpte = ptep_modify_prot_start(vma, addr, pte);
@@ -300,7 +313,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 				pages++;
 			}
 		}
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte += nr, addr += nr * PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
-- 
2.30.2



  parent reply	other threads:[~2025-04-28 12:34 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-28 12:04 [PATCH 0/7] Optimize mprotect for large folios Dev Jain
2025-04-28 12:04 ` [PATCH 1/7] mm: Refactor code in mprotect Dev Jain
2025-04-28 12:04 ` Dev Jain [this message]
2025-04-28 12:04 ` [PATCH 3/7] mm: Add batched versions of ptep_modify_prot_start/commit Dev Jain
2025-04-28 12:04 ` [PATCH 4/7] arm64: Add batched version of ptep_modify_prot_start Dev Jain
2025-04-28 18:06   ` Zi Yan
2025-04-29  4:44     ` Dev Jain
2025-04-28 12:04 ` [PATCH 5/7] arm64: Add batched version of ptep_modify_prot_commit Dev Jain
2025-04-28 12:04 ` [PATCH 6/7] mm: Batch around can_change_pte_writable() Dev Jain
2025-04-28 12:50   ` Lance Yang
2025-04-28 12:59     ` Dev Jain
2025-04-28 13:23       ` Lance Yang
2025-04-29  4:59         ` Dev Jain
2025-04-28 13:16     ` Lance Yang
2025-04-28 15:54       ` Lance Yang
2025-04-28 12:04 ` [PATCH 7/7] mm: Optimize mprotect() through PTE-batching Dev Jain
2025-04-28 12:52 ` [PATCH 0/7] Optimize mprotect for large folios Dev Jain
2025-04-28 13:31 ` Lance Yang
2025-04-29  4:40   ` Dev Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250428120414.12101-3-dev.jain@arm.com \
    --to=dev.jain@arm.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=ioworker0@gmail.com \
    --cc=jannh@google.com \
    --cc=joey.gouly@arm.com \
    --cc=kevin.brodsky@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=namit@vmware.com \
    --cc=peterx@redhat.com \
    --cc=quic_zhenhuah@quicinc.com \
    --cc=ryan.roberts@arm.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=yangyicong@hisilicon.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.