From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
npiggin@gmail.com, kaleshsingh@google.com,
joel@joelfernandes.org,
"Kirill A . Shutemov" <kirill@shutemov.name>,
linuxppc-dev@lists.ozlabs.org
Subject: [PATCH v7 10/11] powerpc/book3s64/mm: Update flush_tlb_range to flush page walk cache
Date: Mon, 7 Jun 2021 11:21:30 +0530 [thread overview]
Message-ID: <20210607055131.156184-11-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20210607055131.156184-1-aneesh.kumar@linux.ibm.com>
flush_tlb_range is special in that we don't specify the page size used
for the translation. Hence when flushing TLB we flush the translation cache
for all possible page sizes. The kernel also uses the same interface when
moving page tables around. Such a move requires us to flush the page walk cache.
Instead of adding another interface to force page walk cache flush,
update flush_tlb_range to flush page walk cache if the range flushed
is more than the PMD range. A page table move will always involve an
invalidate range more than PMD_SIZE.
Running microbenchmark with mprotect and parallel memory access
didn't show any observable performance impact.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
.../include/asm/book3s/64/tlbflush-radix.h | 2 +
arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 8 +++-
arch/powerpc/mm/book3s64/radix_tlb.c | 44 ++++++++++++-------
3 files changed, 36 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 8b33601cdb9d..ab9d5e535000 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -60,6 +60,8 @@ extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cb91071eef52..23d3e08911d3 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -32,7 +32,13 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
struct hstate *hstate = hstate_file(vma->vm_file);
psize = hstate_get_psize(hstate);
- radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ /*
+ * Flush PWC even if we get PUD_SIZE hugetlb invalidate to keep this simpler.
+ */
+ if (end - start >= PUD_SIZE)
+ radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
+ else
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 817a02ef6032..35c5eb23bfaf 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -997,14 +997,13 @@ static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
-
{
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool fullmm = (end == TLB_FLUSH_ALL);
- bool flush_pid;
+ bool flush_pid, flush_pwc = false;
enum tlb_flush_type type;
pid = mm->context.id;
@@ -1023,8 +1022,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
else
flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+ /*
+ * full pid flush already does the PWC flush. if it is not full pid
+ * flush check the range is more than PMD and force a pwc flush
+ * mremap() depends on this behaviour.
+ */
+ if (!flush_pid && (end - start) >= PMD_SIZE)
+ flush_pwc = true;
if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+ unsigned long type = H_RPTI_TYPE_TLB;
unsigned long tgt = H_RPTI_TARGET_CMMU;
unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
@@ -1032,19 +1039,20 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
if (atomic_read(&mm->context.copros) > 0)
tgt |= H_RPTI_TARGET_NMMU;
- pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
- start, end);
+ if (flush_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
} else if (flush_pid) {
+ /*
+ * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
+ */
if (type == FLUSH_TYPE_LOCAL) {
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
} else {
if (cputlb_use_tlbie()) {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
} else {
- _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
}
} else {
@@ -1060,6 +1068,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (type == FLUSH_TYPE_LOCAL) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
@@ -1067,6 +1078,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
ppc_after_tlbiel_barrier();
} else if (cputlb_use_tlbie()) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
@@ -1074,10 +1087,10 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
- start, end, pid, page_size, mmu_virtual_psize, false);
+ start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
if (hflush)
_tlbiel_va_range_multicast(mm,
- hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
}
}
out:
@@ -1151,9 +1164,6 @@ void radix__flush_all_lpid_guest(unsigned int lpid)
_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize);
-
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
@@ -1260,8 +1270,8 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
{
__radix__flush_tlb_range_psize(mm, start, end, psize, true);
}
--
2.31.1
next prev parent reply other threads:[~2021-06-07 5:57 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-07 5:51 [PATCH v7 00/11] Speedup mremap on ppc64 Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 01/11] mm/mremap: Fix race between MOVE_PMD mremap and pageout Aneesh Kumar K.V
2021-06-08 0:06 ` Hugh Dickins
2021-06-08 7:52 ` Aneesh Kumar K.V
2021-06-08 9:42 ` Kirill A. Shutemov
2021-06-08 11:17 ` Aneesh Kumar K.V
2021-06-08 12:05 ` Kirill A. Shutemov
2021-06-08 20:39 ` Hugh Dickins
2021-06-07 5:51 ` [PATCH v7 02/11] mm/mremap: Fix race between MOVE_PUD " Aneesh Kumar K.V
2021-06-14 14:55 ` [mm/mremap] ecf8443e51: vm-scalability.throughput -29.4% regression kernel test robot
2021-06-14 14:58 ` Linus Torvalds
2021-06-14 16:08 ` Aneesh Kumar K.V
2021-06-17 2:38 ` [LKP] " Liu, Yujie
2021-06-07 5:51 ` [PATCH v7 03/11] selftest/mremap_test: Update the test to handle pagesize other than 4K Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 04/11] selftest/mremap_test: Avoid crash with static build Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 05/11] mm/mremap: Convert huge PUD move to separate helper Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 06/11] mm/mremap: Don't enable optimized PUD move if page table levels is 2 Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 07/11] mm/mremap: Use pmd/pud_poplulate to update page table entries Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 08/11] powerpc/mm/book3s64: Fix possible build error Aneesh Kumar K.V
2021-06-07 5:51 ` [PATCH v7 09/11] mm/mremap: Allow arch runtime override Aneesh Kumar K.V
2021-06-07 5:51 ` Aneesh Kumar K.V [this message]
2021-06-07 5:51 ` [PATCH v7 11/11] powerpc/mm: Enable HAVE_MOVE_PMD support Aneesh Kumar K.V
2021-06-07 10:10 ` [PATCH v7 00/11] Speedup mremap on ppc64 Nick Piggin
2021-06-08 4:39 ` Aneesh Kumar K.V
2021-06-08 5:03 ` Nicholas Piggin
2021-06-08 17:10 ` Linus Torvalds
2021-06-16 1:44 ` Nicholas Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210607055131.156184-11-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=joel@joelfernandes.org \
--cc=kaleshsingh@google.com \
--cc=kirill@shutemov.name \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=npiggin@gmail.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).