All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lance Yang <lance.yang@linux.dev>
To: Dev Jain <dev.jain@arm.com>, akpm@linux-foundation.org
Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	catalin.marinas@arm.com, will@kernel.org,
	Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com,
	vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com,
	peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com,
	baohua@kernel.org, kevin.brodsky@arm.com,
	quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu,
	yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org,
	namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com,
	ziy@nvidia.com
Subject: Re: [PATCH v2 0/7] Optimize mprotect for large folios
Date: Tue, 29 Apr 2025 15:06:33 +0800	[thread overview]
Message-ID: <ba73873b-1b26-44cd-ac0f-76e33e8fc2cf@linux.dev> (raw)
In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com>

Hey Dev,

Hmm... I also hit the same compilation errors:

In file included from ./include/linux/kasan.h:37,
                  from ./include/linux/slab.h:260,
                  from ./include/linux/crypto.h:19,
                  from arch/x86/kernel/asm-offsets.c:9:
./include/linux/pgtable.h: In function ‘modify_prot_start_ptes’:
./include/linux/pgtable.h:905:15: error: implicit declaration of 
function ‘ptep_modify_prot_start’ [-Werror=implicit-function-declaration]
   905 |         pte = ptep_modify_prot_start(vma, addr, ptep);
       |               ^~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h:905:15: error: incompatible types when 
assigning to type ‘pte_t’ from type ‘int’
./include/linux/pgtable.h:909:27: error: incompatible types when 
assigning to type ‘pte_t’ from type ‘int’
   909 |                 tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
       |                           ^~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h: In function ‘modify_prot_commit_ptes’:
./include/linux/pgtable.h:925:17: error: implicit declaration of 
function ‘ptep_modify_prot_commit’ [-Werror=implicit-function-declaration]
   925 |                 ptep_modify_prot_commit(vma, addr, ptep, 
old_pte, pte);
       |                 ^~~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h: At top level:
./include/linux/pgtable.h:1360:21: error: conflicting types for 
‘ptep_modify_prot_start’; have ‘pte_t(struct vm_area_struct *, long 
unsigned int,  pte_t *)’
  1360 | static inline pte_t ptep_modify_prot_start(struct 
vm_area_struct *vma,
       |                     ^~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h:905:15: note: previous implicit declaration of 
‘ptep_modify_prot_start’ with type ‘int()’
   905 |         pte = ptep_modify_prot_start(vma, addr, ptep);
       |               ^~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h:1371:20: warning: conflicting types for 
‘ptep_modify_prot_commit’; have ‘void(struct vm_area_struct *, long 
unsigned int,  pte_t *, pte_t,  pte_t)’
  1371 | static inline void ptep_modify_prot_commit(struct 
vm_area_struct *vma,
       |                    ^~~~~~~~~~~~~~~~~~~~~~~
./include/linux/pgtable.h:1371:20: error: static declaration of 
‘ptep_modify_prot_commit’ follows non-static declaration
./include/linux/pgtable.h:925:17: note: previous implicit declaration of 
‘ptep_modify_prot_commit’ with type ‘void(struct vm_area_struct *, long 
unsigned int,  pte_t *, pte_t,  pte_t)’
   925 |                 ptep_modify_prot_commit(vma, addr, ptep, 
old_pte, pte);
       |                 ^~~~~~~~~~~~~~~~~~~~~~~
   CC 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/libstring.o
   CC 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/libctype.o
   CC 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/str_error_r.o
   CC 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/librbtree.o
cc1: some warnings being treated as errors
make[2]: *** [scripts/Makefile.build:98: arch/x86/kernel/asm-offsets.s] 
Error 1
make[1]: *** 
[/home/runner/work/mm-test-robot/mm-test-robot/linux/Makefile:1280: 
prepare0] Error 2
make[1]: *** Waiting for unfinished jobs....
   LD 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/objtool-in.o
   LINK 
/home/runner/work/mm-test-robot/mm-test-robot/linux/tools/objtool/objtool
make: *** [Makefile:248: __sub-make] Error 2

Well, modify_prot_start_ptes() calls ptep_modify_prot_start(), but x86
does not define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION. To avoid
implicit declaration errors, the architecture-independent
ptep_modify_prot_start() must be defined before modify_prot_start_ptes().

With the changes below, things work correctly now ;)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 10cdb87ccecf..d9d6c49bb914 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -895,44 +895,6 @@ static inline void wrprotect_ptes(struct mm_struct 
*mm, unsigned long addr,
  }
  #endif

-/* See the comment for ptep_modify_prot_start */
-#ifndef modify_prot_start_ptes
-static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
-		unsigned long addr, pte_t *ptep, unsigned int nr)
-{
-	pte_t pte, tmp_pte;
-
-	pte = ptep_modify_prot_start(vma, addr, ptep);
-	while (--nr) {
-		ptep++;
-		addr += PAGE_SIZE;
-		tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
-		if (pte_dirty(tmp_pte))
-			pte = pte_mkdirty(pte);
-		if (pte_young(tmp_pte))
-			pte = pte_mkyoung(pte);
-	}
-	return pte;
-}
-#endif
-
-/* See the comment for ptep_modify_prot_commit */
-#ifndef modify_prot_commit_ptes
-static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, 
unsigned long addr,
-		pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
-{
-	for (;;) {
-		ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
-		if (--nr == 0)
-			break;
-		ptep++;
-		addr += PAGE_SIZE;
-		old_pte = pte_next_pfn(old_pte);
-		pte = pte_next_pfn(pte);
-	}
-}
-#endif
-
  /*
   * On some architectures hardware does not set page access bit when 
accessing
   * memory page, it is responsibility of software setting this bit. It 
brings
@@ -1375,6 +1337,45 @@ static inline void ptep_modify_prot_commit(struct 
vm_area_struct *vma,
  	__ptep_modify_prot_commit(vma, addr, ptep, pte);
  }
  #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
+
+/* See the comment for ptep_modify_prot_start */
+#ifndef modify_prot_start_ptes
+static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+	pte_t pte, tmp_pte;
+
+	pte = ptep_modify_prot_start(vma, addr, ptep);
+	while (--nr) {
+		ptep++;
+		addr += PAGE_SIZE;
+		tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
+		if (pte_dirty(tmp_pte))
+			pte = pte_mkdirty(pte);
+		if (pte_young(tmp_pte))
+			pte = pte_mkyoung(pte);
+	}
+	return pte;
+}
+#endif
+
+/* See the comment for ptep_modify_prot_commit */
+#ifndef modify_prot_commit_ptes
+static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, 
unsigned long addr,
+		pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		addr += PAGE_SIZE;
+		old_pte = pte_next_pfn(old_pte);
+		pte = pte_next_pfn(pte);
+	}
+}
+#endif
+
  #endif /* CONFIG_MMU */

  /*
--

Thanks,
Lance

On 2025/4/29 13:23, Dev Jain wrote:
> This patchset optimizes the mprotect() system call for large folios
> by PTE-batching.
> 
> We use the following test cases to measure performance, mprotect()'ing
> the mapped memory to read-only then read-write 40 times:
> 
> Test case 1: Mapping 1G of memory, touching it to get PMD-THPs, then
> pte-mapping those THPs
> Test case 2: Mapping 1G of memory with 64K mTHPs
> Test case 3: Mapping 1G of memory with 4K pages
> 
> Average execution time on arm64, Apple M3:
> Before the patchset:
> T1: 7.9 seconds   T2: 7.9 seconds   T3: 4.2 seconds
> 
> After the patchset:
> T1: 2.1 seconds   T2: 2.2 seconds   T3: 4.2 seconds
> 
> Observing T1/T2 and T3 before the patchset, we also remove the regression
> introduced by ptep_get() on a contpte block. And, for large folios we get
> an almost 74% performance improvement.
> 
> v1->v2:
>   - Rebase onto mm-unstable (6ebffe676fcf: util_macros.h: make the header more resilient)
>   - Abridge the anon-exclusive condition (Lance Yang)
> 
> Dev Jain (7):
>    mm: Refactor code in mprotect
>    mm: Optimize mprotect() by batch-skipping PTEs
>    mm: Add batched versions of ptep_modify_prot_start/commit
>    arm64: Add batched version of ptep_modify_prot_start
>    arm64: Add batched version of ptep_modify_prot_commit
>    mm: Batch around can_change_pte_writable()
>    mm: Optimize mprotect() through PTE-batching
> 
>   arch/arm64/include/asm/pgtable.h |  10 ++
>   arch/arm64/mm/mmu.c              |  21 +++-
>   include/linux/mm.h               |   4 +-
>   include/linux/pgtable.h          |  42 ++++++++
>   mm/gup.c                         |   2 +-
>   mm/huge_memory.c                 |   4 +-
>   mm/memory.c                      |   6 +-
>   mm/mprotect.c                    | 165 ++++++++++++++++++++-----------
>   mm/pgtable-generic.c             |  16 ++-
>   9 files changed, 198 insertions(+), 72 deletions(-)
> 



  parent reply	other threads:[~2025-04-29  7:09 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-29  5:23 [PATCH v2 0/7] Optimize mprotect for large folios Dev Jain
2025-04-29  5:23 ` [PATCH v2 1/7] mm: Refactor code in mprotect Dev Jain
2025-04-29  6:41   ` Anshuman Khandual
2025-04-29  6:54     ` Dev Jain
2025-04-29 11:00   ` Lorenzo Stoakes
2025-04-29  5:23 ` [PATCH v2 2/7] mm: Optimize mprotect() by batch-skipping PTEs Dev Jain
2025-04-29  7:14   ` Anshuman Khandual
2025-04-29  8:59     ` Dev Jain
2025-04-29 13:19   ` Lorenzo Stoakes
2025-04-30  6:37     ` Dev Jain
2025-04-30 13:18       ` Ryan Roberts
2025-04-30 13:36         ` Lorenzo Stoakes
2025-04-29  5:23 ` [PATCH v2 3/7] mm: Add batched versions of ptep_modify_prot_start/commit Dev Jain
2025-04-29  8:39   ` Anshuman Khandual
2025-04-29  9:01     ` Dev Jain
2025-04-29 13:52   ` Lorenzo Stoakes
2025-04-30  6:25     ` Dev Jain
2025-04-30 14:37       ` Lorenzo Stoakes
2025-05-06 14:30         ` David Hildenbrand
2025-05-06 15:03           ` Lorenzo Stoakes
2025-04-30 14:09     ` Ryan Roberts
2025-04-30 14:34       ` Lorenzo Stoakes
2025-05-01 11:33         ` Ryan Roberts
2025-05-01 12:58           ` Lorenzo Stoakes
2025-05-06 14:28             ` David Hildenbrand
2025-04-30  5:35   ` kernel test robot
2025-04-30  5:45   ` kernel test robot
2025-04-30 14:16   ` Ryan Roberts
2025-04-29  5:23 ` [PATCH v2 4/7] arm64: Add batched version of ptep_modify_prot_start Dev Jain
2025-04-30  5:43   ` Anshuman Khandual
2025-04-30  5:49     ` Dev Jain
2025-04-30  6:14       ` Anshuman Khandual
2025-04-30  6:32         ` Dev Jain
2025-04-29  5:23 ` [PATCH v2 5/7] arm64: Add batched version of ptep_modify_prot_commit Dev Jain
2025-04-29  5:23 ` [PATCH v2 6/7] mm: Batch around can_change_pte_writable() Dev Jain
2025-04-29  9:15   ` David Hildenbrand
2025-04-29  9:19   ` David Hildenbrand
2025-04-29  9:27     ` David Hildenbrand
2025-04-29 13:57       ` Lorenzo Stoakes
2025-04-29 14:00         ` David Hildenbrand
2025-04-30  5:44         ` Dev Jain
2025-05-06  9:16       ` Dev Jain
2025-05-06 14:34         ` David Hildenbrand
2025-04-30  6:17   ` kernel test robot
2025-04-29  5:23 ` [PATCH v2 7/7] mm: Optimize mprotect() through PTE-batching Dev Jain
2025-04-29  7:06 ` Lance Yang [this message]
2025-04-29  9:02   ` [PATCH v2 0/7] Optimize mprotect for large folios Dev Jain
2025-04-29 10:41     ` Lorenzo Stoakes
2025-04-30  5:42       ` Dev Jain
2025-04-30  6:22         ` Lance Yang
2025-04-30  7:07           ` Dev Jain
2025-04-29 11:03 ` Lorenzo Stoakes
2025-04-29 14:02   ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ba73873b-1b26-44cd-ac0f-76e33e8fc2cf@linux.dev \
    --to=lance.yang@linux.dev \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=david@redhat.com \
    --cc=dev.jain@arm.com \
    --cc=hughd@google.com \
    --cc=ioworker0@gmail.com \
    --cc=jannh@google.com \
    --cc=joey.gouly@arm.com \
    --cc=kevin.brodsky@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=namit@vmware.com \
    --cc=peterx@redhat.com \
    --cc=quic_zhenhuah@quicinc.com \
    --cc=ryan.roberts@arm.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=yangyicong@hisilicon.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.