From: Nicholas Piggin <npiggin@gmail.com>
To: linux-mm@kvack.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Bibo Mao <maobibo@loongson.cn>
Subject: [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion
Date: Sun, 20 Dec 2020 14:55:35 +1000 [thread overview]
Message-ID: <20201220045535.848591-4-npiggin@gmail.com> (raw)
In-Reply-To: <20201220045535.848591-1-npiggin@gmail.com>
Similarly to the previous patch, this tries to optimise dirty/accessed
bits in ptes to avoid access costs of hardware setting them.
This tidies up a few last cases where dirty/accessed faults can be seen,
and subsumes the pte_sw_mkyoung helper -- it's not just architectures
with explicit software dirty/accessed bits that take expensive faults to
modify ptes.
The vast majority of the remaining dirty/accessed faults on kbuild
workloads after this patch are from NUMA migration, due to
remove_migration_pte inserting old/clean ptes.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/mips/include/asm/pgtable.h | 2 --
include/linux/pgtable.h | 16 ----------------
mm/huge_memory.c | 4 ++--
mm/memory.c | 14 +++++++-------
mm/migrate.c | 1 +
mm/shmem.c | 1 +
mm/userfaultfd.c | 2 +-
7 files changed, 12 insertions(+), 28 deletions(-)
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4f9c37616d42..3275495adccb 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -406,8 +406,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
}
-#define pte_sw_mkyoung pte_mkyoung
-
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; }
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8fcdfa52eb4b..70d04931dff4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -424,22 +424,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
}
#endif
-/*
- * On some architectures hardware does not set page access bit when accessing
- * memory page, it is responsibilty of software setting this bit. It brings
- * out extra page fault penalty to track page access bit. For optimization page
- * access bit can be set during all page fault flow on these arches.
- * To be differentiate with macro pte_mkyoung, this macro is used on platforms
- * where software maintains page access bit.
- */
-#ifndef pte_sw_mkyoung
-static inline pte_t pte_sw_mkyoung(pte_t pte)
-{
- return pte;
-}
-#define pte_sw_mkyoung pte_sw_mkyoung
-#endif
-
#ifndef pte_savedwrite
#define pte_savedwrite pte_write
#endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f2ca0326b5af..f6719312dc27 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2151,8 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
entry = maybe_mkwrite(entry, vma);
if (!write)
entry = pte_wrprotect(entry);
- if (!young)
- entry = pte_mkold(entry);
+ if (young)
+ entry = pte_mkyoung(entry);
if (soft_dirty)
entry = pte_mksoft_dirty(entry);
if (uffd_wp)
diff --git a/mm/memory.c b/mm/memory.c
index dd1f364d8ca3..4cebba596660 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1639,7 +1639,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
get_page(page);
inc_mm_counter_fast(mm, mm_counter_file(page));
page_add_file_rmap(page, false);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ set_pte_at(mm, addr, pte, pte_mkyoung(mk_pte(page, prot)));
return 0;
}
@@ -1954,10 +1954,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
else
entry = pte_mkspecial(pfn_t_pte(pfn, prot));
- if (mkwrite) {
- entry = pte_mkyoung(entry);
+ entry = pte_mkyoung(entry);
+ if (mkwrite)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- }
set_pte_at(mm, addr, pte, entry);
update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
@@ -2889,7 +2888,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
+ entry = pte_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
* Clear the pte entry and flush it first, before updating the
@@ -3402,6 +3401,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
+ pte = pte_mkyoung(pte);
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -3545,7 +3545,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
entry = mk_pte(page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
+ entry = pte_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
@@ -3821,7 +3821,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
+ entry = pte_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
diff --git a/mm/migrate.c b/mm/migrate.c
index ee5e612b4cd8..d33b2bfc846b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2963,6 +2963,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
}
} else {
entry = mk_pte(page, vma->vm_page_prot);
+ entry = pte_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..4f23b16d6baf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2420,6 +2420,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
goto out_release;
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+ _dst_pte = pte_mkyoung(_dst_pte);
if (dst_vma->vm_flags & VM_WRITE)
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
else {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..56c44aa06a7e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -99,7 +99,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
goto out_release;
- _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+ _dst_pte = pte_mkdirty(pte_mkyoung(mk_pte(page, dst_vma->vm_page_prot)));
if (dst_vma->vm_flags & VM_WRITE) {
if (wp_copy)
_dst_pte = pte_mkuffd_wp(_dst_pte);
--
2.23.0
next prev parent reply other threads:[~2020-12-20 4:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-20 4:55 [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Nicholas Piggin
2020-12-20 4:55 ` [PATCH v3 1/3] mm/cow: don't bother write protecting already write-protected huge pages Nicholas Piggin
2020-12-20 4:55 ` [PATCH v3 2/3] mm/cow: optimise pte accessed bit handling in fork Nicholas Piggin
2020-12-20 4:55 ` Nicholas Piggin [this message]
2020-12-21 18:21 ` [PATCH v3 3/3] mm: optimise pte dirty/accessed bit setting by demand based pte insertion Hugh Dickins
2020-12-22 3:24 ` Nicholas Piggin
2020-12-23 0:56 ` Huang Pei
2020-12-20 18:00 ` [PATCH v3 0/3] mm: improve pte updates and dirty/accessed Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201220045535.848591-4-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=linux-mm@kvack.org \
--cc=maobibo@loongson.cn \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).