From: Baoquan He <bhe@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: "Russell King (Oracle)" <linux@armlinux.org.uk>,
Andrew Morton <akpm@linux-foundation.org>,
linux-mm@kvack.org, Christoph Hellwig <hch@lst.de>,
Uladzislau Rezki <urezki@gmail.com>,
Lorenzo Stoakes <lstoakes@gmail.com>,
Peter Zijlstra <peterz@infradead.org>,
John Ogness <jogness@linutronix.de>,
linux-arm-kernel@lists.infradead.org,
Mark Rutland <mark.rutland@arm.com>,
Marc Zyngier <maz@kernel.org>,
x86@kernel.org
Subject: [RFC PATCH 1/3] mm/vmalloc.c: try to flush vmap_area one by one
Date: Fri, 19 May 2023 20:01:27 +0800 [thread overview]
Message-ID: <ZGdlF0nuLoSI7FPX@MiWiFi-R3L-srv> (raw)
In-Reply-To: <87edng6qu8.ffs@tglx>
In the current __purge_vmap_area_lazy(), when trying to flush TLB of
vmalloc area, it calculate the flushing the range with [min:max] of vas.
That calculated range could be big because of the gap between the vas.
E.g in below graph, there are only 12 (4 from va_1, 8 from va_2) pages.
While the calculated flush range is 58.
VA_1 VA_2
|....|-------------------------|............|
10 12 60 68
. mapped;
- not mapped.
Sometime the calculated flush range could be surprisingly huge because
the vas could cross two kernel virtual address area. E.g the vmalloc and
the kernel module area are very far away from each other on some
architectures.
So for systems which lack a full TLB flush, to flush a long range is
a big problem(it takes time). Flushing va one by one becomes necessary
in that case.
Hence, introduce flush_tlb_kernel_vas() to try to flush va one by one.
And add CONFIG_HAVE_FLUSH_TLB_KERNEL_VAS to indicate if a certain
architecture has provided a flush_tlb_kernel_vas() implementation.
Otherwise, take the old way to calculate and flush the whole range.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Baoquan He <bhe@redhat.com> #Fix error of 'undefined reference to `flush_tlb_kernel_vas''
---
arch/Kconfig | 4 ++++
arch/arm/Kconfig | 1 +
arch/arm/kernel/smp_tlb.c | 23 +++++++++++++++++++++++
arch/x86/Kconfig | 1 +
arch/x86/mm/tlb.c | 22 ++++++++++++++++++++++
include/linux/vmalloc.h | 8 ++++++++
mm/vmalloc.c | 32 ++++++++++++++++++++++----------
7 files changed, 81 insertions(+), 10 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..ca5413f1e4e0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -270,6 +270,10 @@ config ARCH_HAS_SET_MEMORY
config ARCH_HAS_SET_DIRECT_MAP
bool
+# Select if architecture provides flush_tlb_kernel_vas()
+config ARCH_HAS_FLUSH_TLB_KERNEL_VAS
+ bool
+
#
# Select if the architecture provides the arch_dma_set_uncached symbol to
# either provide an uncached segment alias for a DMA allocation, or
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0fb4b218f665..c4de7f38f9a7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -10,6 +10,7 @@ config ARM
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_FLUSH_TLB_KERNEL_VAS
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index d4908b3736d8..22ec9b982cb1 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -7,6 +7,7 @@
#include <linux/preempt.h>
#include <linux/smp.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#include <asm/smp_plat.h>
#include <asm/tlbflush.h>
@@ -69,6 +70,19 @@ static inline void ipi_flush_tlb_kernel_range(void *arg)
local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
}
+static inline void local_flush_tlb_kernel_vas(struct list_head *vmap_list)
+{
+ struct vmap_area *va;
+
+ list_for_each_entry(va, vmap_list, list)
+ local_flush_tlb_kernel_range(va->va_start, va->va_end);
+}
+
+static inline void ipi_flush_tlb_kernel_vas(void *arg)
+{
+ local_flush_tlb_kernel_vas(arg);
+}
+
static inline void ipi_flush_bp_all(void *ignored)
{
local_flush_bp_all();
@@ -244,6 +258,15 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
broadcast_tlb_a15_erratum();
}
+void flush_tlb_kernel_vas(struct list_head *vmap_list, unsigned long num_entries)
+{
+ if (tlb_ops_need_broadcast()) {
+ on_each_cpu(ipi_flush_tlb_kernel_vas, vmap_list, 1);
+ } else
+ local_flush_tlb_kernel_vas(vmap_list);
+ broadcast_tlb_a15_erratum();
+}
+
void flush_bp_all(void)
{
if (tlb_ops_need_broadcast())
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..7d7a44810a0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -77,6 +77,7 @@ config X86
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FLUSH_TLB_KERNEL_VAS
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 267acf27480a..c39d77eb37e4 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -10,6 +10,7 @@
#include <linux/debugfs.h>
#include <linux/sched/smt.h>
#include <linux/task_work.h>
+#include <linux/vmalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -1081,6 +1082,27 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
}
+static void do_flush_tlb_vas(void *arg)
+{
+ struct list_head *vmap_list = arg;
+ struct vmap_area *va;
+ unsigned long addr;
+
+ list_for_each_entry(va, vmap_list, list) {
+ /* flush range by one by one 'invlpg' */
+ for (addr = va->va_start; addr < va->va_end; addr += PAGE_SIZE)
+ flush_tlb_one_kernel(addr);
+ }
+}
+
+void flush_tlb_kernel_vas(struct list_head *vmap_list, unsigned long num_entries)
+{
+ if (num_entries > tlb_single_page_flush_ceiling)
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
+ else
+ on_each_cpu(do_flush_tlb_vas, vmap_list, 1);
+}
+
/*
* This can be used from process context to figure out what the value of
* CR3 is without needing to do a (slow) __read_cr3().
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c720be70c8dd..a9a1e488261d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -295,4 +295,12 @@ bool vmalloc_dump_obj(void *object);
static inline bool vmalloc_dump_obj(void *object) { return false; }
#endif
+#if defined(CONFIG_HAVE_FLUSH_TLB_KERNEL_VAS)
+void flush_tlb_kernel_vas(struct list_head *list, unsigned long num_entries);
+#else
+static inline void flush_tlb_kernel_vas(struct list_head *list, unsigned long num_entries)
+{
+}
+#endif
+
#endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c0f80982eb06..31e8d9e93650 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1724,7 +1724,8 @@ static void purge_fragmented_blocks_allcpus(void);
*/
static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
{
- unsigned long resched_threshold;
+ unsigned long resched_threshold, num_entries = 0, num_alias_entries = 0;
+ struct vmap_area alias_va = { .va_start = start, .va_end = end };
unsigned int num_purged_areas = 0;
struct list_head local_purge_list;
struct vmap_area *va, *n_va;
@@ -1736,18 +1737,29 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
list_replace_init(&purge_vmap_area_list, &local_purge_list);
spin_unlock(&purge_vmap_area_lock);
- if (unlikely(list_empty(&local_purge_list)))
- goto out;
+ start = min(start, list_first_entry(&local_purge_list, struct vmap_area, list)->va_start);
+ end = max(end, list_last_entry(&local_purge_list, struct vmap_area, list)->va_end);
+
+ if (IS_ENABLED(CONFIG_HAVE_FLUSH_TLB_KERNEL_VAS)) {
+ list_for_each_entry(va, &local_purge_list, list)
+ num_entries += (va->va_end - va->va_start) >> PAGE_SHIFT;
+
+ if (unlikely(!num_entries))
+ goto out;
+
+ if (alias_va.va_end > alias_va.va_start) {
+ num_alias_entries = (alias_va.va_end - alias_va.va_start) >> PAGE_SHIFT;
+ list_add(&alias_va.list, &local_purge_list);
+ }
- start = min(start,
- list_first_entry(&local_purge_list,
- struct vmap_area, list)->va_start);
+ flush_tlb_kernel_vas(&local_purge_list, num_entries + num_alias_entries);
- end = max(end,
- list_last_entry(&local_purge_list,
- struct vmap_area, list)->va_end);
+ if (num_alias_entries)
+ list_del(&alias_va.list);
+ } else {
+ flush_tlb_kernel_range(start, end);
+ }
- flush_tlb_kernel_range(start, end);
resched_threshold = lazy_max_pages() << 1;
spin_lock(&free_vmap_area_lock);
--
2.34.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2023-05-19 12:02 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-15 16:43 Excessive TLB flush ranges Thomas Gleixner
2023-05-15 16:59 ` Russell King (Oracle)
2023-05-15 19:46 ` Thomas Gleixner
2023-05-15 21:11 ` Thomas Gleixner
2023-05-15 21:31 ` Russell King (Oracle)
2023-05-16 6:37 ` Thomas Gleixner
2023-05-16 6:46 ` Thomas Gleixner
2023-05-16 8:18 ` Thomas Gleixner
2023-05-16 8:20 ` Thomas Gleixner
2023-05-16 8:27 ` Russell King (Oracle)
2023-05-16 9:03 ` Thomas Gleixner
2023-05-16 10:05 ` Baoquan He
2023-05-16 14:21 ` Thomas Gleixner
2023-05-16 19:03 ` Thomas Gleixner
2023-05-17 9:38 ` Thomas Gleixner
2023-05-17 10:52 ` Baoquan He
2023-05-19 11:22 ` Thomas Gleixner
2023-05-19 11:49 ` Baoquan He
2023-05-19 14:13 ` Thomas Gleixner
2023-05-19 12:01 ` Baoquan He [this message]
2023-05-19 14:16 ` [RFC PATCH 1/3] mm/vmalloc.c: try to flush vmap_area one by one Thomas Gleixner
2023-05-19 12:02 ` [RFC PATCH 2/3] mm/vmalloc.c: Only flush VM_FLUSH_RESET_PERMS area immediately Baoquan He
2023-05-19 12:03 ` [RFC PATCH 3/3] mm/vmalloc.c: change _vm_unmap_aliases() to do purge firstly Baoquan He
2023-05-19 14:17 ` Thomas Gleixner
2023-05-19 18:38 ` Thomas Gleixner
2023-05-19 23:46 ` Baoquan He
2023-05-21 23:10 ` Thomas Gleixner
2023-05-22 11:21 ` Baoquan He
2023-05-22 12:02 ` Thomas Gleixner
2023-05-22 14:34 ` Baoquan He
2023-05-22 20:21 ` Thomas Gleixner
2023-05-22 20:44 ` Thomas Gleixner
2023-05-23 9:35 ` Baoquan He
2023-05-19 13:49 ` Excessive TLB flush ranges Thomas Gleixner
2023-05-16 8:21 ` Russell King (Oracle)
2023-05-16 8:19 ` Russell King (Oracle)
2023-05-16 8:44 ` Thomas Gleixner
2023-05-16 8:48 ` Russell King (Oracle)
2023-05-16 12:09 ` Thomas Gleixner
2023-05-16 13:42 ` Uladzislau Rezki
2023-05-16 14:38 ` Thomas Gleixner
2023-05-16 15:01 ` Uladzislau Rezki
2023-05-16 17:04 ` Thomas Gleixner
2023-05-17 11:26 ` Uladzislau Rezki
2023-05-17 11:58 ` Thomas Gleixner
2023-05-17 12:15 ` Uladzislau Rezki
2023-05-17 16:32 ` Thomas Gleixner
2023-05-19 10:01 ` Uladzislau Rezki
2023-05-19 14:56 ` Thomas Gleixner
2023-05-19 15:14 ` Uladzislau Rezki
2023-05-19 16:32 ` Thomas Gleixner
2023-05-19 17:02 ` Uladzislau Rezki
2023-05-16 17:56 ` Nadav Amit
2023-05-16 19:32 ` Thomas Gleixner
2023-05-17 0:23 ` Thomas Gleixner
2023-05-17 1:23 ` Nadav Amit
2023-05-17 10:31 ` Thomas Gleixner
2023-05-17 11:47 ` Thomas Gleixner
2023-05-17 22:41 ` Nadav Amit
2023-05-17 14:43 ` Mark Rutland
2023-05-17 16:41 ` Thomas Gleixner
2023-05-17 22:57 ` Nadav Amit
2023-05-19 11:49 ` Thomas Gleixner
2023-05-17 12:12 ` Russell King (Oracle)
2023-05-17 23:14 ` Nadav Amit
2023-05-15 18:17 ` Uladzislau Rezki
2023-05-16 2:26 ` Baoquan He
2023-05-16 6:40 ` Thomas Gleixner
2023-05-16 8:07 ` Baoquan He
2023-05-16 8:10 ` Baoquan He
2023-05-16 8:45 ` Russell King (Oracle)
2023-05-16 9:13 ` Thomas Gleixner
2023-05-16 8:54 ` Thomas Gleixner
2023-05-16 9:48 ` Baoquan He
2023-05-15 20:02 ` Nadav Amit
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZGdlF0nuLoSI7FPX@MiWiFi-R3L-srv \
--to=bhe@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=hch@lst.de \
--cc=jogness@linutronix.de \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-mm@kvack.org \
--cc=linux@armlinux.org.uk \
--cc=lstoakes@gmail.com \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=urezki@gmail.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).