From: Nicholas Piggin <npiggin@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
x86@kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Arnd Bergmann <arnd@arndb.de>,
Peter Zijlstra <peterz@infradead.org>,
linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
linux-mm@kvack.org, Anton Blanchard <anton@ozlabs.org>
Subject: [PATCH 5/8] lazy tlb: allow lazy tlb mm switching to be configurable
Date: Sun, 29 Nov 2020 02:01:38 +1000 [thread overview]
Message-ID: <20201128160141.1003903-6-npiggin@gmail.com> (raw)
In-Reply-To: <20201128160141.1003903-1-npiggin@gmail.com>
NOMMU systems could easily go without this and save a bit of code
and the refcount atomics, because their mm switch is a no-op. I
haven't flipped them over because haven't audited all arch code to
convert over to using the _lazy_tlb refcounting.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 11 +++++++
include/linux/sched/mm.h | 13 ++++++--
kernel/sched/core.c | 68 +++++++++++++++++++++++++++++-----------
kernel/sched/sched.h | 4 ++-
4 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..596bf589d74b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -430,6 +430,17 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.
+# Should make this depend on MMU, because there is little use for lazy mm switching
+# with NOMMU. Must audit NOMMU architecture code for lazy mm refcounting first.
+config MMU_LAZY_TLB
+ def_bool y
+ help
+ Enable "lazy TLB" mmu context switching for kernel threads.
+
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 7157c0f6fef8..bd0f27402d4b 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -51,12 +51,21 @@ static inline void mmdrop(struct mm_struct *mm)
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment finish_task_switch.
+ */
+ smp_mb();
+ }
}
/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e372b613d514..3b79c6cc3a37 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3579,7 +3579,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;
/*
@@ -3598,7 +3598,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);
- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif
/*
* A task struct has one reference for the use as "current".
@@ -3630,6 +3633,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* rq->curr, before returning to userspace, for
* {PRIVATE,GLOBAL}_EXPEDITED. This is implicitly provided by
* mmdrop_lazy_tlb().
+ *
+ * This same issue applies to other places that mmdrop_lazy_tlb().
*/
if (mm)
mmdrop_lazy_tlb(mm);
@@ -3719,22 +3724,10 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
calculate_sigpending();
}
-/*
- * context_switch - switch to the new MM and the new thread's register state.
- */
-static __always_inline struct rq *
-context_switch(struct rq *rq, struct task_struct *prev,
- struct task_struct *next, struct rq_flags *rf)
+static __always_inline void
+context_switch_mm(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
{
- prepare_task_switch(rq, prev, next);
-
- /*
- * For paravirt, this is coupled with an exit in switch_to to
- * combine the page table reload and the switch backend into
- * one hypercall.
- */
- arch_start_context_switch(prev);
-
/*
* kernel -> kernel lazy + transfer active
* user -> kernel lazy + mmgrab_lazy_tlb() active
@@ -3765,11 +3758,50 @@ context_switch(struct rq *rq, struct task_struct *prev,
if (!prev->mm) { // from kernel
exit_lazy_tlb(prev->active_mm, next);
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
/* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /* See membarrier comment in finish_task_switch(). */
+ smp_mb();
+#endif
}
}
+}
+
+static __always_inline void
+context_switch_mm_nolazy(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!next->mm)
+ next->active_mm = &init_mm;
+ membarrier_switch_mm(rq, prev->active_mm, next->active_mm);
+ switch_mm_irqs_off(prev->active_mm, next->active_mm, next);
+ if (!prev->mm)
+ prev->active_mm = NULL;
+}
+
+/*
+ * context_switch - switch to the new MM and the new thread's register state.
+ */
+static __always_inline struct rq *
+context_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next, struct rq_flags *rf)
+{
+ prepare_task_switch(rq, prev, next);
+
+ /*
+ * For paravirt, this is coupled with an exit in switch_to to
+ * combine the page table reload and the switch backend into
+ * one hypercall.
+ */
+ arch_start_context_switch(prev);
+
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ context_switch_mm(rq, prev, next);
+ else
+ context_switch_mm_nolazy(rq, prev, next);
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfcea92e..3b72aec5a2f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -950,7 +950,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif
unsigned int clock_update_flags;
u64 clock;
--
2.23.0
WARNING: multiple messages have this Message-ID (diff)
From: Nicholas Piggin <npiggin@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org, Arnd Bergmann <arnd@arndb.de>,
Peter Zijlstra <peterz@infradead.org>,
x86@kernel.org, Nicholas Piggin <npiggin@gmail.com>,
linux-mm@kvack.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 5/8] lazy tlb: allow lazy tlb mm switching to be configurable
Date: Sun, 29 Nov 2020 02:01:38 +1000 [thread overview]
Message-ID: <20201128160141.1003903-6-npiggin@gmail.com> (raw)
In-Reply-To: <20201128160141.1003903-1-npiggin@gmail.com>
NOMMU systems could easily go without this and save a bit of code
and the refcount atomics, because their mm switch is a no-op. I
haven't flipped them over because haven't audited all arch code to
convert over to using the _lazy_tlb refcounting.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/Kconfig | 11 +++++++
include/linux/sched/mm.h | 13 ++++++--
kernel/sched/core.c | 68 +++++++++++++++++++++++++++++-----------
kernel/sched/sched.h | 4 ++-
4 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..596bf589d74b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -430,6 +430,17 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.
+# Should make this depend on MMU, because there is little use for lazy mm switching
+# with NOMMU. Must audit NOMMU architecture code for lazy mm refcounting first.
+config MMU_LAZY_TLB
+ def_bool y
+ help
+ Enable "lazy TLB" mmu context switching for kernel threads.
+
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+ depends on MMU_LAZY_TLB
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 7157c0f6fef8..bd0f27402d4b 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -51,12 +51,21 @@ static inline void mmdrop(struct mm_struct *mm)
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment finish_task_switch.
+ */
+ smp_mb();
+ }
}
/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e372b613d514..3b79c6cc3a37 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3579,7 +3579,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;
/*
@@ -3598,7 +3598,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);
- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif
/*
* A task struct has one reference for the use as "current".
@@ -3630,6 +3633,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* rq->curr, before returning to userspace, for
* {PRIVATE,GLOBAL}_EXPEDITED. This is implicitly provided by
* mmdrop_lazy_tlb().
+ *
+ * This same issue applies to other places that mmdrop_lazy_tlb().
*/
if (mm)
mmdrop_lazy_tlb(mm);
@@ -3719,22 +3724,10 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
calculate_sigpending();
}
-/*
- * context_switch - switch to the new MM and the new thread's register state.
- */
-static __always_inline struct rq *
-context_switch(struct rq *rq, struct task_struct *prev,
- struct task_struct *next, struct rq_flags *rf)
+static __always_inline void
+context_switch_mm(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
{
- prepare_task_switch(rq, prev, next);
-
- /*
- * For paravirt, this is coupled with an exit in switch_to to
- * combine the page table reload and the switch backend into
- * one hypercall.
- */
- arch_start_context_switch(prev);
-
/*
* kernel -> kernel lazy + transfer active
* user -> kernel lazy + mmgrab_lazy_tlb() active
@@ -3765,11 +3758,50 @@ context_switch(struct rq *rq, struct task_struct *prev,
if (!prev->mm) { // from kernel
exit_lazy_tlb(prev->active_mm, next);
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
/* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /* See membarrier comment in finish_task_switch(). */
+ smp_mb();
+#endif
}
}
+}
+
+static __always_inline void
+context_switch_mm_nolazy(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!next->mm)
+ next->active_mm = &init_mm;
+ membarrier_switch_mm(rq, prev->active_mm, next->active_mm);
+ switch_mm_irqs_off(prev->active_mm, next->active_mm, next);
+ if (!prev->mm)
+ prev->active_mm = NULL;
+}
+
+/*
+ * context_switch - switch to the new MM and the new thread's register state.
+ */
+static __always_inline struct rq *
+context_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next, struct rq_flags *rf)
+{
+ prepare_task_switch(rq, prev, next);
+
+ /*
+ * For paravirt, this is coupled with an exit in switch_to to
+ * combine the page table reload and the switch backend into
+ * one hypercall.
+ */
+ arch_start_context_switch(prev);
+
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB))
+ context_switch_mm(rq, prev, next);
+ else
+ context_switch_mm_nolazy(rq, prev, next);
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfcea92e..3b72aec5a2f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -950,7 +950,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif
unsigned int clock_update_flags;
u64 clock;
--
2.23.0
next prev parent reply other threads:[~2020-11-28 22:07 UTC|newest]
Thread overview: 92+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-28 16:01 [PATCH 0/8] shoot lazy tlbs Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-28 16:01 ` [PATCH 1/8] lazy tlb: introduce exit_lazy_tlb Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-29 0:38 ` Andy Lutomirski
2020-11-29 0:38 ` Andy Lutomirski
2020-12-02 2:49 ` Nicholas Piggin
2020-12-02 2:49 ` Nicholas Piggin
2020-11-28 16:01 ` [PATCH 2/8] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-28 17:55 ` Andy Lutomirski
2020-11-28 17:55 ` Andy Lutomirski
2020-12-02 2:49 ` Nicholas Piggin
2020-12-02 2:49 ` Nicholas Piggin
2020-12-03 5:09 ` Andy Lutomirski
2020-12-03 5:09 ` Andy Lutomirski
2020-12-05 8:00 ` Nicholas Piggin
2020-12-05 8:00 ` Nicholas Piggin
2020-12-05 16:11 ` Andy Lutomirski
2020-12-05 16:11 ` Andy Lutomirski
2020-12-05 23:14 ` Nicholas Piggin
2020-12-05 23:14 ` Nicholas Piggin
2020-12-06 0:36 ` Andy Lutomirski
2020-12-06 0:36 ` Andy Lutomirski
2020-12-06 3:59 ` Nicholas Piggin
2020-12-06 3:59 ` Nicholas Piggin
2020-12-11 0:11 ` Andy Lutomirski
2020-12-11 0:11 ` Andy Lutomirski
2020-12-14 4:07 ` Nicholas Piggin
2020-12-14 4:07 ` Nicholas Piggin
2020-12-14 5:53 ` Nicholas Piggin
2020-12-14 5:53 ` Nicholas Piggin
2020-11-30 14:57 ` Mathieu Desnoyers
2020-11-30 14:57 ` Mathieu Desnoyers
2020-11-28 16:01 ` [PATCH 3/8] x86: remove ARCH_HAS_SYNC_CORE_BEFORE_USERMODE Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-28 16:01 ` [PATCH 4/8] lazy tlb: introduce lazy mm refcount helper functions Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin [this message]
2020-11-28 16:01 ` [PATCH 5/8] lazy tlb: allow lazy tlb mm switching to be configurable Nicholas Piggin
2020-11-29 0:36 ` Andy Lutomirski
2020-11-29 0:36 ` Andy Lutomirski
2020-12-02 2:49 ` Nicholas Piggin
2020-12-02 2:49 ` Nicholas Piggin
2020-11-28 16:01 ` [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-29 3:54 ` Andy Lutomirski
2020-11-29 3:54 ` Andy Lutomirski
2020-11-29 20:16 ` Andy Lutomirski
2020-11-29 20:16 ` Andy Lutomirski
2020-11-30 9:25 ` Peter Zijlstra
2020-11-30 9:25 ` Peter Zijlstra
2020-11-30 18:31 ` Andy Lutomirski
2020-11-30 18:31 ` Andy Lutomirski
2020-12-01 21:27 ` Will Deacon
2020-12-01 21:27 ` Will Deacon
2020-12-01 21:50 ` Andy Lutomirski
2020-12-01 21:50 ` Andy Lutomirski
2020-12-01 23:04 ` Will Deacon
2020-12-01 23:04 ` Will Deacon
2020-12-02 3:47 ` Nicholas Piggin
2020-12-02 3:47 ` Nicholas Piggin
2020-12-03 5:05 ` Andy Lutomirski
2020-12-03 5:05 ` Andy Lutomirski
2020-12-03 17:03 ` Alexander Gordeev
2020-12-03 17:03 ` Alexander Gordeev
2020-12-03 17:14 ` Andy Lutomirski
2020-12-03 17:14 ` Andy Lutomirski
2020-12-03 18:33 ` Alexander Gordeev
2020-12-03 18:33 ` Alexander Gordeev
2020-11-30 9:26 ` Peter Zijlstra
2020-11-30 9:26 ` Peter Zijlstra
2020-11-30 9:30 ` Peter Zijlstra
2020-11-30 9:30 ` Peter Zijlstra
2020-11-30 9:34 ` Peter Zijlstra
2020-11-30 9:34 ` Peter Zijlstra
2020-12-02 3:09 ` Nicholas Piggin
2020-12-02 3:09 ` Nicholas Piggin
2020-12-02 11:17 ` Peter Zijlstra
2020-12-02 11:17 ` Peter Zijlstra
2020-12-02 12:45 ` Peter Zijlstra
2020-12-02 12:45 ` Peter Zijlstra
2020-12-02 14:19 ` Peter Zijlstra
2020-12-02 14:19 ` Peter Zijlstra
2020-12-02 14:38 ` Andy Lutomirski
2020-12-02 14:38 ` Andy Lutomirski
2020-12-02 16:29 ` Peter Zijlstra
2020-12-02 16:29 ` Peter Zijlstra
2020-11-28 16:01 ` [PATCH 7/8] powerpc: use lazy mm refcount helper functions Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
2020-11-28 16:01 ` [PATCH 8/8] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN Nicholas Piggin
2020-11-28 16:01 ` Nicholas Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201128160141.1003903-6-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=anton@ozlabs.org \
--cc=arnd@arndb.de \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=peterz@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.