[PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Nicholas Piggin <npiggin@gmail.com>
To: linux-mm@kvack.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Bibo Mao <maobibo@loongson.cn>
Subject: [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion
Date: Sun, 20 Dec 2020 14:55:35 +1000	[thread overview]
Message-ID: <20201220045535.848591-4-npiggin@gmail.com> (raw)
In-Reply-To: <20201220045535.848591-1-npiggin@gmail.com>

Similarly to the previous patch, this tries to optimise dirty/accessed
bits in ptes to avoid access costs of hardware setting them.

This tidies up a few last cases where dirty/accessed faults can be seen,
and subsumes the pte_sw_mkyoung helper -- it's not just architectures
with explicit software dirty/accessed bits that take expensive faults to
modify ptes.

The vast majority of the remaining dirty/accessed faults on kbuild
workloads after this patch are from NUMA migration, due to
remove_migration_pte inserting old/clean ptes.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/mips/include/asm/pgtable.h |  2 --
 include/linux/pgtable.h         | 16 ----------------
 mm/huge_memory.c                |  4 ++--
 mm/memory.c                     | 14 +++++++-------
 mm/migrate.c                    |  1 +
 mm/shmem.c                      |  1 +
 mm/userfaultfd.c                |  2 +-
 7 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4f9c37616d42..3275495adccb 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -406,8 +406,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return pte;
 }
 
-#define pte_sw_mkyoung	pte_mkyoung
-
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)	{ return pte_val(pte) & _PAGE_HUGE; }
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8fcdfa52eb4b..70d04931dff4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -424,22 +424,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
-/*
- * On some architectures hardware does not set page access bit when accessing
- * memory page, it is responsibilty of software setting this bit. It brings
- * out extra page fault penalty to track page access bit. For optimization page
- * access bit can be set during all page fault flow on these arches.
- * To be differentiate with macro pte_mkyoung, this macro is used on platforms
- * where software maintains page access bit.
- */
-#ifndef pte_sw_mkyoung
-static inline pte_t pte_sw_mkyoung(pte_t pte)
-{
-	return pte;
-}
-#define pte_sw_mkyoung	pte_sw_mkyoung
-#endif
-
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f2ca0326b5af..f6719312dc27 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2151,8 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = maybe_mkwrite(entry, vma);
 			if (!write)
 				entry = pte_wrprotect(entry);
-			if (!young)
-				entry = pte_mkold(entry);
+			if (young)
+				entry = pte_mkyoung(entry);
 			if (soft_dirty)
 				entry = pte_mksoft_dirty(entry);
 			if (uffd_wp)
diff --git a/mm/memory.c b/mm/memory.c
index dd1f364d8ca3..4cebba596660 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1639,7 +1639,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
 	get_page(page);
 	inc_mm_counter_fast(mm, mm_counter_file(page));
 	page_add_file_rmap(page, false);
-	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+	set_pte_at(mm, addr, pte, pte_mkyoung(mk_pte(page, prot)));
 	return 0;
 }
 
@@ -1954,10 +1954,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 	else
 		entry = pte_mkspecial(pfn_t_pte(pfn, prot));
 
-	if (mkwrite) {
-		entry = pte_mkyoung(entry);
+	entry = pte_mkyoung(entry);
+	if (mkwrite)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	}
 
 	set_pte_at(mm, addr, pte, entry);
 	update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
@@ -2889,7 +2888,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
-		entry = pte_sw_mkyoung(entry);
+		entry = pte_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
@@ -3402,6 +3401,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
+	pte = pte_mkyoung(pte);
 	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -3545,7 +3545,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
-	entry = pte_sw_mkyoung(entry);
+	entry = pte_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3821,7 +3821,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
 
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, vma->vm_page_prot);
-	entry = pte_sw_mkyoung(entry);
+	entry = pte_mkyoung(entry);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	/* copy-on-write page */
diff --git a/mm/migrate.c b/mm/migrate.c
index ee5e612b4cd8..d33b2bfc846b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2963,6 +2963,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 		}
 	} else {
 		entry = mk_pte(page, vma->vm_page_prot);
+		entry = pte_mkyoung(entry);
 		if (vma->vm_flags & VM_WRITE)
 			entry = pte_mkwrite(pte_mkdirty(entry));
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..4f23b16d6baf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2420,6 +2420,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 		goto out_release;
 
 	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+	_dst_pte = pte_mkyoung(_dst_pte);
 	if (dst_vma->vm_flags & VM_WRITE)
 		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
 	else {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..56c44aa06a7e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -99,7 +99,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 	if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
 		goto out_release;
 
-	_dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+	_dst_pte = pte_mkdirty(pte_mkyoung(mk_pte(page, dst_vma->vm_page_prot)));
 	if (dst_vma->vm_flags & VM_WRITE) {
 		if (wp_copy)
 			_dst_pte = pte_mkuffd_wp(_dst_pte);
-- 
2.23.0

next prev parent reply	other threads:[~2020-12-20  4:55 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-20  4:55 [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Nicholas Piggin
2020-12-20  4:55 ` [PATCH v3 1/3] mm/cow: don't bother write protecting already write-protected huge pages Nicholas Piggin
2020-12-20  4:55 ` [PATCH v3 2/3] mm/cow: optimise pte accessed bit handling in fork Nicholas Piggin
2020-12-20  4:55 ` Nicholas Piggin [this message]
2020-12-21 18:21   ` [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion Hugh Dickins
2020-12-22  3:24     ` Nicholas Piggin
2020-12-23  0:56       ` Huang Pei
2020-12-20 18:00 ` [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Linus Torvalds

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4f9c37616d4 dfblob:3275495adcc dfblob:8fcdfa52eb4
dfblob:70d04931dff dfblob:f2ca0326b5a dfblob:f6719312dc2
dfblob:dd1f364d8ca dfblob:4cebba59666 dfblob:ee5e612b4cd
dfblob:d33b2bfc846 dfblob:7c6b6d8f6c3 dfblob:4f23b16d6ba
dfblob:9a3d451402d dfblob:56c44aa06a7 )
 OR (
bs:"[PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201220045535.848591-4-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=linux-mm@kvack.org \
    --cc=maobibo@loongson.cn \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).