From: Nicholas Piggin <npiggin@gmail.com>
To: linux-arch@vger.kernel.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
x86@kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Arnd Bergmann <arnd@arndb.de>,
Peter Zijlstra <peterz@infradead.org>,
linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
linux-mm@kvack.org, Anton Blanchard <anton@ozlabs.org>
Subject: [RFC PATCH 7/7] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
Date: Fri, 10 Jul 2020 11:56:46 +1000 [thread overview]
Message-ID: <20200710015646.2020871-8-npiggin@gmail.com> (raw)
In-Reply-To: <20200710015646.2020871-1-npiggin@gmail.com>
On big systems, the mm refcount can become highly contented when doing
a lot of context switching with threaded applications (particularly
switching between the idle thread and an application thread).
Abandoning lazy tlb slows switching down quite a bit in the important
user->idle->user cases, so so instead implement a non-refcounted scheme
that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
any remaining lazy ones.
On a 16-socket 192-core POWER8 system, a context switching benchmark
with as many software threads as CPUs (so each switch will go in and
out of idle), upstream can achieve a rate of about 1 million context
switches per second. After this patch it goes up to 118 million.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 16 ++++++++++++++++
arch/powerpc/Kconfig | 1 +
include/linux/sched/mm.h | 6 +++---
kernel/fork.c | 39 +++++++++++++++++++++++++++++++++++++++
4 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 2daf8fe6146a..edf69437a971 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -418,6 +418,22 @@ config MMU_LAZY_TLB
help
Enable "lazy TLB" mmu context switching for kernel threads.
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+ depends on !MMU_LAZY_TLB_SHOOTDOWN
+
+config MMU_LAZY_TLB_SHOOTDOWN
+ bool
+ depends on MMU_LAZY_TLB
+ help
+ Instead of refcounting the "lazy tlb" mm struct, which can cause
+ contention with multi-threaded apps on large multiprocessor systems,
+ this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
+ switch to init_mm if they were using the to-be-freed mm as the lazy
+ tlb. Architectures which do not track all possible lazy tlb CPUs in
+ mm_cpumask can not use this (without modification).
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 920c4e3ca4ef..24ac85c868db 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -225,6 +225,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_PAGE_SIZE
+ select MMU_LAZY_TLB_SHOOTDOWN
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2c2b20e2ccc7..1067af8039bd 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -53,19 +53,19 @@ void mmdrop(struct mm_struct *mm);
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmdrop(mm);
}
static inline void mmdrop_lazy_tlb_smp_mb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmdrop(mm); /* This depends on mmdrop providing a full smp_mb() */
else
smp_mb();
diff --git a/kernel/fork.c b/kernel/fork.c
index 142b23645d82..da0fba9e6079 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -685,6 +685,40 @@ static void check_mm(struct mm_struct *mm)
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+static void do_shoot_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ BUG_ON(current->mm);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ }
+}
+
+static void do_check_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ BUG_ON(current->active_mm == mm);
+}
+
+static void shoot_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ smp_call_function_many(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
+ do_shoot_lazy_tlb(mm);
+ }
+}
+
+static void check_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_VM)) {
+ smp_call_function(do_check_lazy_tlb, (void *)mm, 1);
+ do_check_lazy_tlb(mm);
+ }
+}
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
@@ -695,6 +729,11 @@ void __mmdrop(struct mm_struct *mm)
BUG_ON(mm == &init_mm);
WARN_ON_ONCE(mm == current->mm);
WARN_ON_ONCE(mm == current->active_mm);
+
+ /* Ensure no CPUs are using this as their lazy tlb mm */
+ shoot_lazy_tlbs(mm);
+ check_lazy_tlbs(mm);
+
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
--
2.23.0
WARNING: multiple messages have this Message-ID (diff)
From: Nicholas Piggin <npiggin@gmail.com>
To: linux-arch@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>,
Peter Zijlstra <peterz@infradead.org>,
x86@kernel.org, linux-kernel@vger.kernel.org,
Nicholas Piggin <npiggin@gmail.com>,
linux-mm@kvack.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
linuxppc-dev@lists.ozlabs.org
Subject: [RFC PATCH 7/7] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
Date: Fri, 10 Jul 2020 11:56:46 +1000 [thread overview]
Message-ID: <20200710015646.2020871-8-npiggin@gmail.com> (raw)
In-Reply-To: <20200710015646.2020871-1-npiggin@gmail.com>
On big systems, the mm refcount can become highly contented when doing
a lot of context switching with threaded applications (particularly
switching between the idle thread and an application thread).
Abandoning lazy tlb slows switching down quite a bit in the important
user->idle->user cases, so so instead implement a non-refcounted scheme
that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
any remaining lazy ones.
On a 16-socket 192-core POWER8 system, a context switching benchmark
with as many software threads as CPUs (so each switch will go in and
out of idle), upstream can achieve a rate of about 1 million context
switches per second. After this patch it goes up to 118 million.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 16 ++++++++++++++++
arch/powerpc/Kconfig | 1 +
include/linux/sched/mm.h | 6 +++---
kernel/fork.c | 39 +++++++++++++++++++++++++++++++++++++++
4 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 2daf8fe6146a..edf69437a971 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -418,6 +418,22 @@ config MMU_LAZY_TLB
help
Enable "lazy TLB" mmu context switching for kernel threads.
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+ depends on !MMU_LAZY_TLB_SHOOTDOWN
+
+config MMU_LAZY_TLB_SHOOTDOWN
+ bool
+ depends on MMU_LAZY_TLB
+ help
+ Instead of refcounting the "lazy tlb" mm struct, which can cause
+ contention with multi-threaded apps on large multiprocessor systems,
+ this option causes __mmdrop to IPI all CPUs in the mm_cpumask and
+ switch to init_mm if they were using the to-be-freed mm as the lazy
+ tlb. Architectures which do not track all possible lazy tlb CPUs in
+ mm_cpumask can not use this (without modification).
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 920c4e3ca4ef..24ac85c868db 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -225,6 +225,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_PAGE_SIZE
+ select MMU_LAZY_TLB_SHOOTDOWN
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2c2b20e2ccc7..1067af8039bd 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -53,19 +53,19 @@ void mmdrop(struct mm_struct *mm);
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmdrop(mm);
}
static inline void mmdrop_lazy_tlb_smp_mb(struct mm_struct *mm)
{
- if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
mmdrop(mm); /* This depends on mmdrop providing a full smp_mb() */
else
smp_mb();
diff --git a/kernel/fork.c b/kernel/fork.c
index 142b23645d82..da0fba9e6079 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -685,6 +685,40 @@ static void check_mm(struct mm_struct *mm)
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+static void do_shoot_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ BUG_ON(current->mm);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ }
+}
+
+static void do_check_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ BUG_ON(current->active_mm == mm);
+}
+
+static void shoot_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ smp_call_function_many(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
+ do_shoot_lazy_tlb(mm);
+ }
+}
+
+static void check_lazy_tlbs(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_VM)) {
+ smp_call_function(do_check_lazy_tlb, (void *)mm, 1);
+ do_check_lazy_tlb(mm);
+ }
+}
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
@@ -695,6 +729,11 @@ void __mmdrop(struct mm_struct *mm)
BUG_ON(mm == &init_mm);
WARN_ON_ONCE(mm == current->mm);
WARN_ON_ONCE(mm == current->active_mm);
+
+ /* Ensure no CPUs are using this as their lazy tlb mm */
+ shoot_lazy_tlbs(mm);
+ check_lazy_tlbs(mm);
+
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
--
2.23.0
next prev parent reply other threads:[~2020-07-10 1:57 UTC|newest]
Thread overview: 118+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-10 1:56 [RFC PATCH 0/7] mmu context cleanup, lazy tlb cleanup, Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 1:56 ` [RFC PATCH 1/7] asm-generic: add generic MMU versions of mmu context functions Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 1:56 ` [RFC PATCH 2/7] arch: use asm-generic mmu context for no-op implementations Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 1:56 ` [RFC PATCH 3/7] mm: introduce exit_lazy_tlb Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 1:56 ` [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 9:42 ` Peter Zijlstra
2020-07-10 9:42 ` Peter Zijlstra
2020-07-10 14:02 ` Mathieu Desnoyers
2020-07-10 14:02 ` Mathieu Desnoyers
2020-07-10 17:04 ` Andy Lutomirski
2020-07-10 17:04 ` Andy Lutomirski
2020-07-13 4:45 ` Nicholas Piggin
2020-07-13 4:45 ` Nicholas Piggin
2020-07-13 13:47 ` Nicholas Piggin
2020-07-13 13:47 ` Nicholas Piggin
2020-07-13 14:13 ` Mathieu Desnoyers
2020-07-13 14:13 ` Mathieu Desnoyers
2020-07-13 15:48 ` Andy Lutomirski
2020-07-13 15:48 ` Andy Lutomirski
2020-07-13 16:37 ` Nicholas Piggin
2020-07-13 16:37 ` Nicholas Piggin
2020-07-16 4:15 ` Nicholas Piggin
2020-07-16 4:15 ` Nicholas Piggin
2020-07-16 4:42 ` Nicholas Piggin
2020-07-16 4:42 ` Nicholas Piggin
2020-07-16 15:46 ` Mathieu Desnoyers
2020-07-16 15:46 ` Mathieu Desnoyers
2020-07-16 16:03 ` Mathieu Desnoyers
2020-07-16 16:03 ` Mathieu Desnoyers
2020-07-16 18:58 ` Mathieu Desnoyers
2020-07-16 18:58 ` Mathieu Desnoyers
2020-07-16 21:24 ` Alan Stern
2020-07-16 21:24 ` Alan Stern
2020-07-17 13:39 ` Mathieu Desnoyers
2020-07-17 13:39 ` Mathieu Desnoyers
2020-07-17 14:51 ` Alan Stern
2020-07-17 14:51 ` Alan Stern
2020-07-17 15:39 ` Mathieu Desnoyers
2020-07-17 15:39 ` Mathieu Desnoyers
2020-07-17 16:11 ` Alan Stern
2020-07-17 16:11 ` Alan Stern
2020-07-17 16:22 ` Mathieu Desnoyers
2020-07-17 16:22 ` Mathieu Desnoyers
2020-07-17 17:44 ` Alan Stern
2020-07-17 17:44 ` Alan Stern
2020-07-17 17:52 ` Mathieu Desnoyers
2020-07-17 17:52 ` Mathieu Desnoyers
2020-07-17 0:00 ` Nicholas Piggin
2020-07-17 0:00 ` Nicholas Piggin
2020-07-16 5:18 ` Andy Lutomirski
2020-07-16 5:18 ` Andy Lutomirski
2020-07-16 6:06 ` Nicholas Piggin
2020-07-16 6:06 ` Nicholas Piggin
2020-07-16 8:50 ` Peter Zijlstra
2020-07-16 8:50 ` Peter Zijlstra
2020-07-16 10:03 ` Nicholas Piggin
2020-07-16 10:03 ` Nicholas Piggin
2020-07-16 11:00 ` peterz
2020-07-16 11:00 ` peterz
2020-07-16 15:34 ` Mathieu Desnoyers
2020-07-16 15:34 ` Mathieu Desnoyers
2020-07-16 23:26 ` Nicholas Piggin
2020-07-16 23:26 ` Nicholas Piggin
2020-07-17 13:42 ` Mathieu Desnoyers
2020-07-17 13:42 ` Mathieu Desnoyers
2020-07-20 3:03 ` Nicholas Piggin
2020-07-20 3:03 ` Nicholas Piggin
2020-07-20 16:46 ` Mathieu Desnoyers
2020-07-20 16:46 ` Mathieu Desnoyers
2020-07-21 10:04 ` Nicholas Piggin
2020-07-21 10:04 ` Nicholas Piggin
2020-07-21 13:11 ` Mathieu Desnoyers
2020-07-21 13:11 ` Mathieu Desnoyers
2020-07-21 14:30 ` Nicholas Piggin
2020-07-21 14:30 ` Nicholas Piggin
2020-07-21 15:06 ` peterz
2020-07-21 15:06 ` peterz
2020-07-21 15:15 ` Mathieu Desnoyers
2020-07-21 15:15 ` Mathieu Desnoyers
2020-07-21 15:19 ` Peter Zijlstra
2020-07-21 15:19 ` Peter Zijlstra
2020-07-21 15:22 ` Mathieu Desnoyers
2020-07-21 15:22 ` Mathieu Desnoyers
2020-07-10 1:56 ` [RFC PATCH 5/7] lazy tlb: introduce lazy mm refcount helper functions Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 9:48 ` Peter Zijlstra
2020-07-10 9:48 ` Peter Zijlstra
2020-07-10 1:56 ` [RFC PATCH 6/7] lazy tlb: allow lazy tlb mm switching to be configurable Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin
2020-07-10 1:56 ` Nicholas Piggin [this message]
2020-07-10 1:56 ` [RFC PATCH 7/7] lazy tlb: shoot lazies, a non-refcounting lazy tlb option Nicholas Piggin
2020-07-10 9:35 ` Peter Zijlstra
2020-07-10 9:35 ` Peter Zijlstra
2020-07-13 4:58 ` Nicholas Piggin
2020-07-13 4:58 ` Nicholas Piggin
2020-07-13 15:59 ` Andy Lutomirski
2020-07-13 15:59 ` Andy Lutomirski
2020-07-13 16:48 ` Nicholas Piggin
2020-07-13 16:48 ` Nicholas Piggin
2020-07-13 18:18 ` Andy Lutomirski
2020-07-13 18:18 ` Andy Lutomirski
2020-07-14 5:04 ` Nicholas Piggin
2020-07-14 5:04 ` Nicholas Piggin
2020-07-14 6:31 ` Nicholas Piggin
2020-07-14 6:31 ` Nicholas Piggin
2020-07-14 12:46 ` Andy Lutomirski
2020-07-14 12:46 ` Andy Lutomirski
2020-07-14 13:23 ` Peter Zijlstra
2020-07-14 13:23 ` Peter Zijlstra
2020-07-16 2:26 ` Nicholas Piggin
2020-07-16 2:26 ` Nicholas Piggin
2020-07-16 2:35 ` Nicholas Piggin
2020-07-16 2:35 ` Nicholas Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200710015646.2020871-8-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=anton@ozlabs.org \
--cc=arnd@arndb.de \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=peterz@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.