linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linuxfoundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Paul McKenney <paulmck@kernel.org>,
	Christoph Hellwig <hch@lst.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, Alexander Viro <viro@zeniv.linux.org.uk>,
	Benjamin LaHaise <bcrl@kvack.org>,
	linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>,
	linux-btrfs@vger.kernel.org, x86@kernel.org,
	Vineet Gupta <vgupta@synopsys.com>,
	linux-snps-arc@lists.infradead.org,
	Russell King <linux@armlinux.org.uk>,
	Arnd Bergmann <arnd@arndb.de>,
	linux-arm-kernel@lists.infradead.org, linux-csky@vger.kernel.org,
	Michal Simek <monstr@monstr.eu>,
	Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	linux-mips@vger.kernel.org, Nick Hu <nickhu@andestech.com>,
	Greentime Hu <green.hu@gmail.com>,
	Vincent Chen <deanbo422@gmail.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	linuxppc-dev@lists.ozlabs.org,
	"David S. Miller" <davem@davemloft.net>,
	sparclinux@vger.kernel.org, Chris Zankel <chris@zankel.net>,
	Max Filippov <jcmvbkbc@gmail.com>,
	linux-xtensa@linux-xtensa.org, Ingo Molnar <mingo@kernel.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Christian Koenig <christian.koenig@amd.com>,
	Huang Rui <ray.huang@amd.com>, David Airlie <airlied@linux.ie>,
	Daniel Vetter <daniel@ffwll.ch>,
	dri-devel@lists.freedesktop.org,
	VMware Graphics <linux-graphics-maintainer@vmware.com>,
	Roland Scheidegger <sroland@vmware.com>,
	Dave Airlie <airlied@redhat.com>,
	Gerd Hoffmann <kraxel@redhat.com>,
	virtualization@lists.linux-foundation.org,
	spice-devel@lists.freedesktop.org,
	Ben Skeggs <bskeggs@redhat.com>,
	nouveau@lists.freedesktop.org,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	intel-gfx@lists.freedesktop.org
Subject: [patch V4 24/37] sched: highmem: Store local kmaps in task struct
Date: Tue, 03 Nov 2020 14:51:24 +0100	[thread overview]
Message-ID: <877dr235wj.fsf@nanos.tec.linutronix.de> (raw)
In-Reply-To: <20201103095859.038791330@linutronix.de>

Instead of storing the map per CPU provide and use per task storage. That
prepares for local kmaps which are preemptible.

The context switch code is preparatory and not yet in use because
kmap_atomic() runs with preemption disabled. Will be made usable in the
next step.

The context switch logic is safe even when an interrupt happens after
clearing or before restoring the kmaps. The kmap index in task struct is
not modified so any nesting kmap in an interrupt will use unused indices
and on return the counter is the same as before.

Also add an assert into the return to user space code. Going back to user
space with an active kmap local is a nono.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V4: Use the version which actually compiles and works
V3: Handle the debug case correctly
---
 include/linux/highmem-internal.h |   10 +++
 include/linux/sched.h            |    9 +++
 kernel/entry/common.c            |    2 
 kernel/fork.c                    |    1 
 kernel/sched/core.c              |   18 +++++++
 mm/highmem.c                     |   99 +++++++++++++++++++++++++++++++++++----
 6 files changed, 129 insertions(+), 10 deletions(-)

--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -9,6 +9,16 @@
 void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
 void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
 void kunmap_local_indexed(void *vaddr);
+void kmap_local_fork(struct task_struct *tsk);
+void __kmap_local_sched_out(void);
+void __kmap_local_sched_in(void);
+static inline void kmap_assert_nomap(void)
+{
+	DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
+}
+#else
+static inline void kmap_local_fork(struct task_struct *tsk) { }
+static inline void kmap_assert_nomap(void) { }
 #endif
 
 #ifdef CONFIG_HIGHMEM
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
 #include <linux/rseq.h>
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
+#include <asm/kmap_size.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -629,6 +630,13 @@ struct wake_q_node {
 	struct wake_q_node *next;
 };
 
+struct kmap_ctrl {
+#ifdef CONFIG_KMAP_LOCAL
+	int				idx;
+	pte_t				pteval[KM_MAX_IDX];
+#endif
+};
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -1294,6 +1302,7 @@ struct task_struct {
 	unsigned int			sequential_io;
 	unsigned int			sequential_io_avg;
 #endif
+	struct kmap_ctrl		kmap_ctrl;
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 	unsigned long			task_state_change;
 #endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -2,6 +2,7 @@
 
 #include <linux/context_tracking.h>
 #include <linux/entry-common.h>
+#include <linux/highmem.h>
 #include <linux/livepatch.h>
 #include <linux/audit.h>
 
@@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st
 
 	/* Ensure that the address limit is intact and no locks are held */
 	addr_limit_user_check();
+	kmap_assert_nomap();
 	lockdep_assert_irqs_disabled();
 	lockdep_sys_exit();
 }
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru
 	account_kernel_stack(tsk, 1);
 
 	kcov_task_init(tsk);
+	kmap_local_fork(tsk);
 
 #ifdef CONFIG_FAULT_INJECTION
 	tsk->fail_nth = 0;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4053,6 +4053,22 @@ static inline void finish_lock_switch(st
 # define finish_arch_post_lock_switch()	do { } while (0)
 #endif
 
+static inline void kmap_local_sched_out(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+	if (unlikely(current->kmap_ctrl.idx))
+		__kmap_local_sched_out();
+#endif
+}
+
+static inline void kmap_local_sched_in(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+	if (unlikely(current->kmap_ctrl.idx))
+		__kmap_local_sched_in();
+#endif
+}
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -4075,6 +4091,7 @@ prepare_task_switch(struct rq *rq, struc
 	perf_event_task_sched_out(prev, next);
 	rseq_preempt(prev);
 	fire_sched_out_preempt_notifiers(prev, next);
+	kmap_local_sched_out();
 	prepare_task(next);
 	prepare_arch_switch(next);
 }
@@ -4141,6 +4158,7 @@ static struct rq *finish_task_switch(str
 	finish_lock_switch(rq);
 	finish_arch_post_lock_switch();
 	kcov_finish_switch(current);
+	kmap_local_sched_in();
 
 	fire_sched_in_preempt_notifiers(current);
 	/*
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high);
 
 #include <asm/kmap_size.h>
 
-static DEFINE_PER_CPU(int, __kmap_local_idx);
-
 /*
  * With DEBUG_HIGHMEM the stack depth is doubled and every second
  * slot is unused which acts as a guard page
@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_
 
 static inline int kmap_local_idx_push(void)
 {
-	int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
-
 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
-	BUG_ON(idx >= KM_MAX_IDX);
-	return idx;
+	current->kmap_ctrl.idx += KM_INCR;
+	BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX);
+	return current->kmap_ctrl.idx - 1;
 }
 
 static inline int kmap_local_idx(void)
 {
-	return __this_cpu_read(__kmap_local_idx) - 1;
+	return current->kmap_ctrl.idx - 1;
 }
 
 static inline void kmap_local_idx_pop(void)
 {
-	int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR);
-
-	BUG_ON(idx < 0);
+	current->kmap_ctrl.idx -= KM_INCR;
+	BUG_ON(current->kmap_ctrl.idx < 0);
 }
 
 #ifndef arch_kmap_local_post_map
@@ -461,6 +457,7 @@ void *__kmap_local_pfn_prot(unsigned lon
 	pteval = pfn_pte(pfn, prot);
 	set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval);
 	arch_kmap_local_post_map(vaddr, pteval);
+	current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
 	preempt_enable();
 
 	return (void *)vaddr;
@@ -505,10 +502,92 @@ void kunmap_local_indexed(void *vaddr)
 	arch_kmap_local_pre_unmap(addr);
 	pte_clear(&init_mm, addr, kmap_pte - idx);
 	arch_kmap_local_post_unmap(addr);
+	current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
 	kmap_local_idx_pop();
 	preempt_enable();
 }
 EXPORT_SYMBOL(kunmap_local_indexed);
+
+/*
+ * Invoked before switch_to(). This is safe even when during or after
+ * clearing the maps an interrupt which needs a kmap_local happens because
+ * the task::kmap_ctrl.idx is not modified by the unmapping code so a
+ * nested kmap_local will use the next unused index and restore the index
+ * on unmap. The already cleared kmaps of the outgoing task are irrelevant
+ * because the interrupt context does not know about them. The same applies
+ * when scheduling back in for an interrupt which happens before the
+ * restore is complete.
+ */
+void __kmap_local_sched_out(void)
+{
+	struct task_struct *tsk = current;
+	pte_t *kmap_pte = kmap_get_pte();
+	int i;
+
+	/* Clear kmaps */
+	for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+		pte_t pteval = tsk->kmap_ctrl.pteval[i];
+		unsigned long addr;
+		int idx;
+
+		/* With debug all even slots are unmapped and act as guard */
+		if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+			WARN_ON_ONCE(!pte_none(pteval));
+			continue;
+		}
+		if (WARN_ON_ONCE(pte_none(pteval)))
+			continue;
+
+		/*
+		 * This is a horrible hack for XTENSA to calculate the
+		 * coloured PTE index. Uses the PFN encoded into the pteval
+		 * and the map index calculation because the actual mapped
+		 * virtual address is not stored in task::kmap_ctrl.
+		 * For any sane architecture this is optimized out.
+		 */
+		idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+
+		addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+		arch_kmap_local_pre_unmap(addr);
+		pte_clear(&init_mm, addr, kmap_pte - idx);
+		arch_kmap_local_post_unmap(addr);
+	}
+}
+
+void __kmap_local_sched_in(void)
+{
+	struct task_struct *tsk = current;
+	pte_t *kmap_pte = kmap_get_pte();
+	int i;
+
+	/* Restore kmaps */
+	for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+		pte_t pteval = tsk->kmap_ctrl.pteval[i];
+		unsigned long addr;
+		int idx;
+
+		/* With debug all even slots are unmapped and act as guard */
+		if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+			WARN_ON_ONCE(!pte_none(pteval));
+			continue;
+		}
+		if (WARN_ON_ONCE(pte_none(pteval)))
+			continue;
+
+		/* See comment in __kmap_local_sched_out() */
+		idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+		addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+		set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+		arch_kmap_local_post_map(addr, pteval);
+	}
+}
+
+void kmap_local_fork(struct task_struct *tsk)
+{
+	if (WARN_ON_ONCE(tsk->kmap_ctrl.idx))
+		memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl));
+}
+
 #endif
 
 #if defined(HASHED_PAGE_VIRTUAL)

  parent reply	other threads:[~2020-11-03 13:52 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-03  9:27 [patch V3 00/37] mm/highmem: Preemptible variant of kmap_atomic & friends Thomas Gleixner
2020-11-03  9:27 ` [patch V3 01/37] mm/highmem: Un-EXPORT __kmap_atomic_idx() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 02/37] highmem: Remove unused functions Thomas Gleixner
2020-11-03  9:27 ` [patch V3 03/37] fs: Remove asm/kmap_types.h includes Thomas Gleixner
2020-11-03 11:12   ` David Sterba
2020-11-03  9:27 ` [patch V3 04/37] sh/highmem: Remove all traces of unused cruft Thomas Gleixner
2020-11-03  9:27 ` [patch V3 05/37] asm-generic: Provide kmap_size.h Thomas Gleixner
2020-11-03 12:25   ` Arnd Bergmann
2020-11-03  9:27 ` [patch V3 06/37] highmem: Provide generic variant of kmap_atomic* Thomas Gleixner
2020-11-03  9:27 ` [patch V3 07/37] highmem: Make DEBUG_HIGHMEM functional Thomas Gleixner
2020-11-03  9:27 ` [patch V3 08/37] x86/mm/highmem: Use generic kmap atomic implementation Thomas Gleixner
2020-11-03  9:27 ` [patch V3 09/37] arc/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 10/37] ARM: highmem: Switch to generic kmap atomic Thomas Gleixner
     [not found]   ` <CGME20201112081036eucas1p14e135a370d3bccab311727fd2e89f4df@eucas1p1.samsung.com>
2020-11-12  8:10     ` Marek Szyprowski
2020-11-12 11:03       ` Thomas Gleixner
2020-11-12 11:07       ` Sebastian Andrzej Siewior
2020-11-03  9:27 ` [patch V3 11/37] csky/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 12/37] microblaze/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 13/37] mips/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 14/37] nds32/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 15/37] powerpc/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 16/37] sparc/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 17/37] xtensa/mm/highmem: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 18/37] highmem: Get rid of kmap_types.h Thomas Gleixner
2020-11-03  9:27 ` [patch V3 19/37] mm/highmem: Remove the old kmap_atomic cruft Thomas Gleixner
2020-11-03  9:27 ` [patch V3 20/37] io-mapping: Cleanup atomic iomap Thomas Gleixner
2020-11-03  9:27 ` [patch V3 21/37] Documentation/io-mapping: Remove outdated blurb Thomas Gleixner
2020-11-03  9:27 ` [patch V3 22/37] highmem: High implementation details and document API Thomas Gleixner
2020-11-03 17:48   ` Linus Torvalds
2020-11-03 19:00     ` Thomas Gleixner
2020-11-03  9:27 ` [patch V3 23/37] sched: Make migrate_disable/enable() independent of RT Thomas Gleixner
2020-11-03  9:27 ` [patch V3 24/37] sched: highmem: Store local kmaps in task struct Thomas Gleixner
2020-11-03 13:49   ` Thomas Gleixner
2020-11-03 13:51   ` Thomas Gleixner [this message]
2020-11-03  9:27 ` [patch V3 25/37] mm/highmem: Provide kmap_local* Thomas Gleixner
2020-11-03  9:27 ` [patch V3 26/37] io-mapping: Provide iomap_local variant Thomas Gleixner
2020-11-03  9:27 ` [patch V3 27/37] x86/crashdump/32: Simplify copy_oldmem_page() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 28/37] mips/crashdump: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 29/37] ARM: mm: Replace kmap_atomic_pfn() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 30/37] highmem: Remove kmap_atomic_pfn() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 31/37] drm/ttm: Replace kmap_atomic() usage Thomas Gleixner
2020-11-03  9:27 ` [patch V3 32/37] drm/vmgfx: Replace kmap_atomic() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 33/37] highmem: Remove kmap_atomic_prot() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 34/37] drm/qxl: Replace io_mapping_map_atomic_wc() Thomas Gleixner
2020-11-03  9:27 ` [patch V3 35/37] drm/nouveau/device: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 36/37] drm/i915: " Thomas Gleixner
2020-11-03  9:27 ` [patch V3 37/37] io-mapping: Remove io_mapping_map_atomic_wc() Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877dr235wj.fsf@nanos.tec.linutronix.de \
    --to=tglx@linutronix.de \
    --cc=airlied@linux.ie \
    --cc=airlied@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=bcrl@kvack.org \
    --cc=benh@kernel.crashing.org \
    --cc=bigeasy@linutronix.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=bskeggs@redhat.com \
    --cc=chris@zankel.net \
    --cc=christian.koenig@amd.com \
    --cc=clm@fb.com \
    --cc=daniel@ffwll.ch \
    --cc=davem@davemloft.net \
    --cc=deanbo422@gmail.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=dsterba@suse.com \
    --cc=green.hu@gmail.com \
    --cc=hch@lst.de \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jani.nikula@linux.intel.com \
    --cc=jcmvbkbc@gmail.com \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=josef@toxicpanda.com \
    --cc=juri.lelli@redhat.com \
    --cc=kraxel@redhat.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-graphics-maintainer@vmware.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-snps-arc@lists.infradead.org \
    --cc=linux-xtensa@linux-xtensa.org \
    --cc=linux@armlinux.org.uk \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=monstr@monstr.eu \
    --cc=mpe@ellerman.id.au \
    --cc=nickhu@andestech.com \
    --cc=nouveau@lists.freedesktop.org \
    --cc=paulmck@kernel.org \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=ray.huang@amd.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=rostedt@goodmis.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=spice-devel@lists.freedesktop.org \
    --cc=sroland@vmware.com \
    --cc=torvalds@linuxfoundation.org \
    --cc=tsbogend@alpha.franken.de \
    --cc=vgupta@synopsys.com \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).