All of lore.kernel.org
 help / color / mirror / Atom feed
* + mm-arm64-override-mkold_clean_ptes-batch-helper.patch added to mm-unstable branch
@ 2024-04-10 21:52 Andrew Morton
  0 siblings, 0 replies; only message in thread
From: Andrew Morton @ 2024-04-10 21:52 UTC (permalink / raw)
  To: mm-commits, zokeefe, xiehuan09, wangkefeng.wang, songmuchun,
	shy828301, ryan.roberts, peterx, minchan, mhocko, fengwei.yin,
	david, 21cnbao, ioworker0, akpm


The patch titled
     Subject: mm/arm64: override mkold_clean_ptes() batch helper
has been added to the -mm mm-unstable branch.  Its filename is
     mm-arm64-override-mkold_clean_ptes-batch-helper.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-arm64-override-mkold_clean_ptes-batch-helper.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: Lance Yang <ioworker0@gmail.com>
Subject: mm/arm64: override mkold_clean_ptes() batch helper
Date: Mon, 8 Apr 2024 12:24:37 +0800

The per-pte get_and_clear/modify/set approach would result in
unfolding/refolding for contpte mappings on arm64.  So we need to override
mkold_clean_ptes() for arm64 to avoid it.

Link: https://lkml.kernel.org/r/20240408042437.10951-3-ioworker0@gmail.com
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Barry Song <21cnbao@gmail.com>
Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 arch/arm64/include/asm/pgtable.h |   55 +++++++++++++++++++++++++++++
 arch/arm64/mm/contpte.c          |   15 +++++++
 2 files changed, 70 insertions(+)

--- a/arch/arm64/include/asm/pgtable.h~mm-arm64-override-mkold_clean_ptes-batch-helper
+++ a/arch/arm64/include/asm/pgtable.h
@@ -1223,6 +1223,34 @@ static inline void __wrprotect_ptes(stru
 		__ptep_set_wrprotect(mm, address, ptep);
 }
 
+static inline void ___ptep_mkold_clean(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep, pte_t pte)
+{
+	pte_t old_pte;
+
+	do {
+		old_pte = pte;
+		pte = pte_mkclean(pte_mkold(pte));
+		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+					       pte_val(old_pte), pte_val(pte));
+	} while (pte_val(pte) != pte_val(old_pte));
+}
+
+static inline void __ptep_mkold_clean(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	___ptep_mkold_clean(mm, addr, ptep, __ptep_get(ptep));
+}
+
+static inline void __mkold_clean_ptes(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep, unsigned int nr)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr; i++, addr += PAGE_SIZE, ptep++)
+		__ptep_mkold_clean(mm, addr, ptep);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
@@ -1379,6 +1407,8 @@ extern void contpte_wrprotect_ptes(struc
 extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep,
 				pte_t entry, int dirty);
+extern void contpte_mkold_clean_ptes(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, unsigned int nr);
 
 static __always_inline void contpte_try_fold(struct mm_struct *mm,
 				unsigned long addr, pte_t *ptep, pte_t pte)
@@ -1603,6 +1633,30 @@ static inline int ptep_set_access_flags(
 	return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
 }
 
+#define mkold_clean_ptes mkold_clean_ptes
+static inline void mkold_clean_ptes(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, unsigned int nr)
+{
+	if (likely(nr == 1)) {
+		/*
+		 * Optimization: mkold_clean_ptes() can only be called for present
+		 * ptes so we only need to check contig bit as condition for unfold,
+		 * and we can remove the contig bit from the pte we read to avoid
+		 * re-reading. This speeds up madvise(MADV_FREE) which is sensitive
+		 * for order-0 folios. Equivalent to contpte_try_unfold().
+		 */
+		pte_t orig_pte = __ptep_get(ptep);
+
+		if (unlikely(pte_cont(orig_pte))) {
+			__contpte_try_unfold(mm, addr, ptep, orig_pte);
+			orig_pte = pte_mknoncont(orig_pte);
+		}
+		___ptep_mkold_clean(mm, addr, ptep, orig_pte);
+	} else {
+		contpte_mkold_clean_ptes(mm, addr, ptep, nr);
+	}
+}
+
 #else /* CONFIG_ARM64_CONTPTE */
 
 #define ptep_get				__ptep_get
@@ -1622,6 +1676,7 @@ static inline int ptep_set_access_flags(
 #define wrprotect_ptes				__wrprotect_ptes
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags			__ptep_set_access_flags
+#define mkold_clean_ptes			__mkold_clean_ptes
 
 #endif /* CONFIG_ARM64_CONTPTE */
 
--- a/arch/arm64/mm/contpte.c~mm-arm64-override-mkold_clean_ptes-batch-helper
+++ a/arch/arm64/mm/contpte.c
@@ -361,6 +361,21 @@ void contpte_wrprotect_ptes(struct mm_st
 }
 EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);
 
+void contpte_mkold_clean_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, unsigned int nr)
+{
+	/*
+	 * If clearing the young and dirty bits for an entire contig range, we can
+	 * avoid unfolding. Just set old/clean and wait for the later mmu_gather
+	 * flush to invalidate the tlb. If it's a partial range though, we need to
+	 * unfold.
+	 */
+
+	contpte_try_unfold_partial(mm, addr, ptep, nr);
+	__mkold_clean_ptes(mm, addr, ptep, nr);
+}
+EXPORT_SYMBOL_GPL(contpte_mkold_clean_ptes);
+
 int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
 					unsigned long addr, pte_t *ptep,
 					pte_t entry, int dirty)
_

Patches currently in -mm which might be from ioworker0@gmail.com are

mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
mm-arm64-override-mkold_clean_ptes-batch-helper.patch


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-04-10 21:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-10 21:52 + mm-arm64-override-mkold_clean_ptes-batch-helper.patch added to mm-unstable branch Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.