[PATCH 10/10] mm,sched: conditionally skip lazy TLB mm refcounting

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Rik van Riel <riel@surriel.com>
To: linux-kernel@vger.kernel.org
Cc: kernel-team@fb.com, peterz@infradead.org, luto@kernel.org,
	x86@kernel.org, vkuznets@redhat.com, mingo@kernel.org,
	efault@gmx.de, dave.hansen@intel.com, will.daecon@arm.com,
	catalin.marinas@arm.com, benh@kernel.crashing.org,
	Rik van Riel <riel@surriel.com>
Subject: [PATCH 10/10] mm,sched: conditionally skip lazy TLB mm refcounting
Date: Sat, 28 Jul 2018 17:53:57 -0400	[thread overview]
Message-ID: <20180728215357.3249-11-riel@surriel.com> (raw)
In-Reply-To: <20180728215357.3249-1-riel@surriel.com>

Conditionally skip lazy TLB mm refcounting. When an architecture has
CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING enabled, an mm that is used in
lazy TLB mode anywhere will get shot down from exit_mmap, and there
in no need to incur the cache line bouncing overhead of refcounting
a lazy TLB mm.

Implement this by moving the refcounting of a lazy TLB mm to helper
functions, which skip the refcounting when it is not necessary.

Deal with use_mm and unuse_mm by fully splitting out the refcounting
of the lazy TLB mm a kernel thread may have when entering use_mm from
the refcounting of the mm that use_mm is about to start using.

Signed-off-by: Rik van Riel <riel@surriel.com>
---
 fs/exec.c                |  2 +-
 include/linux/sched/mm.h | 25 +++++++++++++++++++++++++
 kernel/sched/core.c      |  6 +++---
 mm/mmu_context.c         | 21 ++++++++++++++-------
 4 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index bdd0eacefdf5..7a6d4811b02b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1043,7 +1043,7 @@ static int exec_mmap(struct mm_struct *mm)
 		mmput(old_mm);
 		return 0;
 	}
-	mmdrop(active_mm);
+	drop_lazy_mm(active_mm);
 	return 0;
 }
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..7308bf38012f 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,31 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+/*
+ * In lazy TLB mode, a CPU keeps the mm of the last process mapped while
+ * running a kernel thread or idle; we must make sure the lazy TLB mm and
+ * page tables do not disappear while a lazy TLB mode CPU uses them.
+ * There are two ways to handle the race between lazy TLB CPUs and exit_mmap:
+ * 1) Have a lazy TLB CPU hold a refcount on the lazy TLB mm.
+ * 2) Have the architecture code shoot down the lazy TLB mm from exit_mmap;
+ *    in that case, refcounting can be skipped, reducing cache line bouncing.
+ */
+static inline void grab_lazy_mm(struct mm_struct *mm)
+{
+	if (IS_ENABLED(CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING))
+		return;
+
+	mmgrab(mm);
+}
+
+static inline void drop_lazy_mm(struct mm_struct *mm)
+{
+	if (IS_ENABLED(CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING))
+		return;
+
+	mmdrop(mm);
+}
+
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c45de46fdf10..11724c9e88b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2691,7 +2691,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	 */
 	if (mm) {
 		membarrier_mm_sync_core_before_usermode(mm);
-		mmdrop(mm);
+		drop_lazy_mm(mm);
 	}
 	if (unlikely(prev_state == TASK_DEAD)) {
 		if (prev->sched_class->task_dead)
@@ -2805,7 +2805,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 */
 	if (!mm) {
 		next->active_mm = oldmm;
-		mmgrab(oldmm);
+		grab_lazy_mm(oldmm);
 		enter_lazy_tlb(oldmm, next);
 	} else
 		switch_mm_irqs_off(oldmm, mm, next);
@@ -5532,7 +5532,7 @@ void idle_task_exit(void)
 		current->active_mm = &init_mm;
 		finish_arch_post_lock_switch();
 	}
-	mmdrop(mm);
+	drop_lazy_mm(mm);
 }
 
 /*
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 3e612ae748e9..d5c2524cdd9a 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -24,12 +24,15 @@ void use_mm(struct mm_struct *mm)
 	struct mm_struct *active_mm;
 	struct task_struct *tsk = current;
 
+	/* Kernel threads have a NULL tsk->mm when entering. */
+	WARN_ON(tsk->mm);
+
 	task_lock(tsk);
+	/* Previous ->active_mm was held in lazy TLB mode. */
 	active_mm = tsk->active_mm;
-	if (active_mm != mm) {
-		mmgrab(mm);
-		tsk->active_mm = mm;
-	}
+	/* Grab mm for reals; tsk->mm needs to stick around until unuse_mm. */
+	mmgrab(mm);
+	tsk->active_mm = mm;
 	tsk->mm = mm;
 	switch_mm(active_mm, mm, tsk);
 	task_unlock(tsk);
@@ -37,8 +40,9 @@ void use_mm(struct mm_struct *mm)
 	finish_arch_post_lock_switch();
 #endif
 
-	if (active_mm != mm)
-		mmdrop(active_mm);
+	/* Drop the lazy TLB mode mm. */
+	if (active_mm)
+		drop_lazy_mm(active_mm);
 }
 EXPORT_SYMBOL_GPL(use_mm);
 
@@ -57,8 +61,11 @@ void unuse_mm(struct mm_struct *mm)
 	task_lock(tsk);
 	sync_mm_rss(mm);
 	tsk->mm = NULL;
-	/* active_mm is still 'mm' */
+	/* active_mm is still 'mm'; grab it as a lazy TLB mm */
+	grab_lazy_mm(mm);
 	enter_lazy_tlb(mm, tsk);
+	/* drop the tsk->mm refcount */
+	mmdrop(mm);
 	task_unlock(tsk);
 }
 EXPORT_SYMBOL_GPL(unuse_mm);
-- 
2.14.4

next prev parent reply	other threads:[~2018-07-28 21:54 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-28 21:53 [PATCH 0/10] x86,tlb,mm: more lazy TLB cleanups & optimizations Rik van Riel
2018-07-28 21:53 ` [PATCH 01/10] x86,tlb: clarify memory barrier in switch_mm_irqs_off Rik van Riel
2018-07-29  2:59   ` Andy Lutomirski
2018-07-28 21:53 ` [PATCH 02/10] smp: use __cpumask_set_cpu in on_each_cpu_cond Rik van Riel
2018-07-29  2:59   ` Andy Lutomirski
2018-07-28 21:53 ` [PATCH 03/10] smp,cpumask: introduce on_each_cpu_cond_mask Rik van Riel
2018-07-29  2:57   ` Andy Lutomirski
2018-07-29 12:00     ` Rik van Riel
     [not found]       ` <E710FBA5-CC5E-4941-ACBF-4AB3424F1F68@amacapital.net>
2018-07-29 17:39         ` Rik van Riel
2018-07-29 17:51         ` Rik van Riel
2018-07-29 18:55           ` Andy Lutomirski
2018-07-29 19:56             ` Linus Torvalds
2018-07-28 21:53 ` [PATCH 04/10] x86,mm: use on_each_cpu_cond for TLB flushes Rik van Riel
2018-07-29  2:58   ` Andy Lutomirski
2018-07-29 12:02     ` Rik van Riel
2018-07-28 21:53 ` [PATCH 05/10] mm,tlb: turn dummy defines into inline functions Rik van Riel
2018-07-28 21:53 ` [PATCH 06/10] mm,x86: skip cr4 and ldt reload when mm stays the same Rik van Riel
2018-07-29  4:21   ` Andy Lutomirski
2018-07-28 21:53 ` [PATCH 07/10] x86,mm: remove leave_mm cpu argument Rik van Riel
2018-07-28 21:53 ` [PATCH 08/10] arch,mm: add config variable to skip lazy TLB mm refcounting Rik van Riel
2018-07-28 21:53 ` [PATCH 09/10] mm,x86: shoot down lazy TLB references at exit_mmap time Rik van Riel
2018-07-28 21:53 ` Rik van Riel [this message]
2018-07-29  4:21   ` [PATCH 10/10] mm,sched: conditionally skip lazy TLB mm refcounting Andy Lutomirski
2018-07-29 12:11     ` Rik van Riel
2018-07-29 15:29       ` Andy Lutomirski
2018-07-29 16:55         ` Rik van Riel
2018-07-29 19:54     ` [PATCH v2 10/11] x86,tlb: really leave mm on shootdown Rik van Riel
2018-07-29 19:54     ` [PATCH v2 11/11] mm,sched: conditionally skip lazy TLB mm refcounting Rik van Riel
2018-07-30  9:55       ` Peter Zijlstra
2018-07-30 14:30         ` Rik van Riel
2018-07-30 16:26           ` Peter Zijlstra
2018-07-30 19:15             ` Rik van Riel
2018-07-30 19:30               ` Andy Lutomirski
2018-07-30 19:36                 ` Rik van Riel
2018-07-30 19:49                   ` Andy Lutomirski
2018-07-30 21:46                     ` Rik van Riel
2018-07-30 22:00                       ` Andy Lutomirski
2018-07-31  1:05             ` Rik van Riel
2018-07-31  9:12               ` Peter Zijlstra
2018-07-31 14:29                 ` Andy Lutomirski
2018-07-31 15:03                   ` Rik van Riel
2018-07-31 15:12                     ` Peter Zijlstra
2018-07-30 11:32 ` [PATCH 0/10] x86,tlb,mm: more lazy TLB cleanups & optimizations Ingo Molnar

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:bdd0eacefdf dfblob:7a6d4811b02 dfblob:44d356f5e47
dfblob:7308bf38012 dfblob:c45de46fdf1 dfblob:11724c9e88b
dfblob:3e612ae748e dfblob:d5c2524cdd9 )
 OR (
bs:"[PATCH 10/10] mm,sched: conditionally skip lazy TLB mm refcounting" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180728215357.3249-11-riel@surriel.com \
    --to=riel@surriel.com \
    --cc=benh@kernel.crashing.org \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@intel.com \
    --cc=efault@gmx.de \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=vkuznets@redhat.com \
    --cc=will.daecon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.