* [PATCH 05/11 v2.1] kprobes/ftrace: Add recursion protection to the ftrace callback
From: Steven Rostedt @ 2020-11-04 18:46 UTC (permalink / raw)
To: Masami Hiramatsu
Cc: Peter Zijlstra, James E.J. Bottomley, Guo Ren, linux-csky,
H. Peter Anvin, Miroslav Benes, Ingo Molnar, linux-s390,
Helge Deller, x86, Anil S Keshavamurthy, Christian Borntraeger,
Naveen N. Rao, Petr Mladek, Vasily Gorbik, Heiko Carstens,
Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
linux-parisc, linux-kernel, Paul Mackerras, Andrew Morton,
linuxppc-dev, David S. Miller
In-Reply-To: <20201103202257.029364fd78492fd8efc360dc@kernel.org>
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
If a ftrace callback does not supply its own recursion protection and
does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will
make a helper trampoline to do so before calling the callback instead of
just calling the callback directly.
The default for ftrace_ops is going to change. It will expect that handlers
provide their own recursion protection, unless its ftrace_ops states
otherwise.
Link: https://lkml.kernel.org/r/20201028115613.140212174@goodmis.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-csky@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
arch/csky/kernel/probes/ftrace.c | 12 ++++++++++--
arch/parisc/kernel/ftrace.c | 16 +++++++++++++---
arch/powerpc/kernel/kprobes-ftrace.c | 11 ++++++++++-
arch/s390/kernel/ftrace.c | 16 +++++++++++++---
arch/x86/kernel/kprobes/ftrace.c | 12 ++++++++++--
5 files changed, 56 insertions(+), 11 deletions(-)
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 5264763d05be..5eb2604fdf71 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -13,16 +13,21 @@ int arch_check_ftrace_location(struct kprobe *p)
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
+ int bit;
bool lr_saver = false;
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- /* Preempt is disabled by ftrace */
+ bit = ftrace_test_recursion_trylock();
+ if (bit < 0)
+ return;
+
+ preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (!p) {
p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
if (unlikely(!p) || kprobe_disabled(p))
- return;
+ goto out;
lr_saver = true;
}
@@ -56,6 +61,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
*/
__this_cpu_write(current_kprobe, NULL);
}
+out:
+ preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 63e3ecb9da81..13d85042810a 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -207,14 +207,21 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
- struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
+ struct kprobe *p;
+ int bit;
- if (unlikely(!p) || kprobe_disabled(p))
+ bit = ftrace_test_recursion_trylock();
+ if (bit < 0)
return;
+ preempt_disable_notrace();
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto out;
+
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
- return;
+ goto out;
}
__this_cpu_write(current_kprobe, p);
@@ -235,6 +242,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
}
}
__this_cpu_write(current_kprobe, NULL);
+out:
+ preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..5df8d50c65ae 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -18,10 +18,16 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
+ int bit;
+ bit = ftrace_test_recursion_trylock();
+ if (bit < 0)
+ return;
+
+ preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
- return;
+ goto out;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
@@ -52,6 +58,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
*/
__this_cpu_write(current_kprobe, NULL);
}
+out:
+ preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index b388e87a08bf..8f31c726537a 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -201,14 +201,21 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
- struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
+ struct kprobe *p;
+ int bit;
- if (unlikely(!p) || kprobe_disabled(p))
+ bit = ftrace_test_recursion_trylock();
+ if (bit < 0)
return;
+ preempt_disable_notrace();
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto out;
+
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
- return;
+ goto out;
}
__this_cpu_write(current_kprobe, p);
@@ -228,6 +235,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
}
}
__this_cpu_write(current_kprobe, NULL);
+out:
+ preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 681a4b36e9bb..a40a6cdfcca3 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -18,11 +18,16 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
+ int bit;
- /* Preempt is disabled by ftrace */
+ bit = ftrace_test_recursion_trylock();
+ if (bit < 0)
+ return;
+
+ preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
- return;
+ goto out;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
@@ -52,6 +57,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
*/
__this_cpu_write(current_kprobe, NULL);
}
+out:
+ preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
--
2.25.4
^ permalink raw reply related
* Re: [PATCH 11/11 v2.2] ftrace: Add recording of functions that caused recursion
From: Steven Rostedt @ 2020-11-04 19:13 UTC (permalink / raw)
To: Petr Mladek
Cc: Anton Vorontsov, linux-doc, Peter Zijlstra,
Sebastian Andrzej Siewior, Kamalesh Babulal, James E.J. Bottomley,
Guo Ren, H. Peter Anvin, live-patching, Miroslav Benes,
Ingo Molnar, linux-s390, Joe Lawrence, Jonathan Corbet,
Mauro Carvalho Chehab, Helge Deller, x86, linux-csky,
Christian Borntraeger, Kees Cook, Vasily Gorbik, Heiko Carstens,
Jiri Kosina, Borislav Petkov, Josh Poimboeuf, Thomas Gleixner,
Tony Luck, linux-parisc, linux-kernel, Masami Hiramatsu,
Colin Cross, Paul Mackerras, Andrew Morton, linuxppc-dev
In-Reply-To: <20201103141043.GO20201@alley>
On Tue, 3 Nov 2020 15:10:43 +0100
Petr Mladek <pmladek@suse.com> wrote:
> BTW: What is actually the purpose of paranoid_test, please?
>
> It prevents nested ftrace_record_recursion() calls on the same CPU
> (recursion, nesting from IRQ, NMI context).
>
> Parallel calls from different CPUs are still possible:
>
> CPU0 CPU1
> if (!atomic_read(¶noid_test)) if (!atomic_read(¶noid_test))
> // passes // passes
> atomic_inc(¶noid_test); atomic_inc(¶noid_test);
>
>
> I do not see how a nested call could cause crash while a parallel
> one would be OK.
The problem was that I don't want something in the recording to get traced,
and that detect the recursion, which would then call the recording, which
would also be traced and detect the recursion and that would call the
recording, which would also be traced and detect the recursion and that
would call the recording, which .... (BOOM!)
Anyway, I decided to simply use another bit to protect against that.
New patch:
From a1f2aae996506169f2561986656f898d067d398b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 29 Oct 2020 15:27:06 -0400
Subject: [PATCH] ftrace: Add recording of functions that caused recursion
This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file
"recursed_functions" all the functions that caused recursion while a
callback to the function tracer was running.
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Guo Ren <guoren@kernel.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-csky@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: live-patching@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
Documentation/trace/ftrace-uses.rst | 6 +-
arch/csky/kernel/probes/ftrace.c | 2 +-
arch/parisc/kernel/ftrace.c | 2 +-
arch/powerpc/kernel/kprobes-ftrace.c | 2 +-
arch/s390/kernel/ftrace.c | 2 +-
arch/x86/kernel/kprobes/ftrace.c | 2 +-
fs/pstore/ftrace.c | 2 +-
include/linux/trace_recursion.h | 29 +++-
kernel/livepatch/patch.c | 2 +-
kernel/trace/Kconfig | 25 +++
kernel/trace/Makefile | 1 +
kernel/trace/ftrace.c | 4 +-
kernel/trace/trace_event_perf.c | 2 +-
kernel/trace/trace_functions.c | 2 +-
kernel/trace/trace_output.c | 6 +-
kernel/trace/trace_output.h | 1 +
kernel/trace/trace_recursion_record.c | 236 ++++++++++++++++++++++++++
17 files changed, 306 insertions(+), 20 deletions(-)
create mode 100644 kernel/trace/trace_recursion_record.c
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 86cd14b8e126..5981d5691745 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -118,7 +118,7 @@ can help in this regard. If you start your code with:
int bit;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
@@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a
function that the callback is tracing. Note, on success,
ftrace_test_recursion_trylock() will disable preemption, and the
ftrace_test_recursion_unlock() will enable it again (if it was previously
-enabled).
+enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to
+ftrace_test_recursion_trylock() to record where the recursion happened
+(if CONFIG_FTRACE_RECORD_RECURSION is set).
Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops
(as explained below), then a helper trampoline will be used to test
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 5eb2604fdf71..f30b179924ef 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 13d85042810a..1c5d3732bda2 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 5df8d50c65ae..fdfee39938ea 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
struct kprobe_ctlblk *kcb;
int bit;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(nip, parent_nip);
if (bit < 0)
return;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 8f31c726537a..657c1ab45408 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -204,7 +204,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index a40a6cdfcca3..954d930a7127 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
int bit;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 816210fc5d3a..adb0935eb062 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -41,7 +41,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
if (unlikely(oops_in_progress))
return;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index ac3d73484cb2..228cc56ed66e 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -91,6 +91,9 @@ enum {
* not be correct. Allow for a single recursion to cover this case.
*/
TRACE_TRANSITION_BIT,
+
+ /* Used to prevent recursion recording from recursing. */
+ TRACE_RECORD_RECURSION_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -142,7 +145,22 @@ static __always_inline int trace_get_context_bit(void)
pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
}
-static __always_inline int trace_test_and_set_recursion(int start, int max)
+#ifdef CONFIG_FTRACE_RECORD_RECURSION
+extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
+# define do_ftrace_record_recursion(ip, pip) \
+ do { \
+ if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
+ trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
+ ftrace_record_recursion(ip, pip); \
+ trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
+ } \
+ } while (0)
+#else
+# define do_ftrace_record_recursion(ip, pip) do { } while (0)
+#endif
+
+static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
+ int start, int max)
{
unsigned int val = current->trace_recursion;
int bit;
@@ -158,8 +176,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
* a switch between contexts. Allow for a single recursion.
*/
bit = TRACE_TRANSITION_BIT;
- if (trace_recursion_test(bit))
+ if (trace_recursion_test(bit)) {
+ do_ftrace_record_recursion(ip, pip);
return -1;
+ }
trace_recursion_set(bit);
barrier();
return bit + 1;
@@ -199,9 +219,10 @@ static __always_inline void trace_clear_recursion(int bit)
* Returns: -1 if a recursion happened.
* >= 0 if no recursion
*/
-static __always_inline int ftrace_test_recursion_trylock(void)
+static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
+ unsigned long parent_ip)
{
- return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
}
/**
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 15480bf3ce88..875c5dbbdd33 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
ops = container_of(fops, struct klp_ops, fops);
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (WARN_ON_ONCE(bit < 0))
return;
/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508..9b11c096d139 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -727,6 +727,31 @@ config TRACE_EVAL_MAP_FILE
If unsure, say N.
+config FTRACE_RECORD_RECURSION
+ bool "Record functions that recurse in function tracing"
+ depends on FUNCTION_TRACER
+ help
+ All callbacks that attach to the function tracing have some sort
+ of protection against recursion. Even though the protection exists,
+ it adds overhead. This option will create a file in the tracefs
+ file system called "recursed_functions" that will list the functions
+ that triggered a recursion.
+
+ This will add more overhead to cases that have recursion.
+
+ If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+ int "Max number of recursed functions to record"
+ default 128
+ depends on FTRACE_RECORD_RECURSION
+ help
+ This defines the limit of number of functions that can be
+ listed in the "recursed_functions" file, that lists all
+ the functions that caused a recursion to happen.
+ This file can be reset, but the limit can not change in
+ size at runtime.
+
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
depends on GCOV_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39f2bba89b76..03aad2b5cd5e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6918,7 +6918,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
@@ -6993,7 +6993,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a2b9fddb8148..1b202e28dfaa 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -447,7 +447,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
if ((unsigned long)ops->private != smp_processor_id())
return;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 89c414ce1388..646eda6c44a5 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -141,7 +141,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
if (unlikely(!tr->function_enabled))
return;
- bit = ftrace_test_recursion_trylock();
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 000e9dc224c6..92b1575ae0ca 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name)
}
#endif /* CONFIG_KRETPROBES */
-static void
-seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
+void
+trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
@@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
goto out;
}
- seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
+ trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
if (sym_flags & TRACE_ITER_SYM_ADDR)
trace_seq_printf(s, " <" IP_FMT ">", ip);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 2f742b74e7e6..4c954636caf0 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -16,6 +16,7 @@ extern int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
+extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
new file mode 100644
index 000000000000..b2edac1fe156
--- /dev/null
+++ b/kernel/trace/trace_recursion_record.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+
+struct recursed_functions {
+ unsigned long ip;
+ unsigned long parent_ip;
+};
+
+static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
+static atomic_t nr_records;
+
+/*
+ * Cache the last found function. Yes, updates to this is racey, but
+ * so is memory cache ;-)
+ */
+static unsigned long cached_function;
+
+void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
+{
+ int index = 0;
+ int i;
+ unsigned long old;
+
+ again:
+ /* First check the last one recorded */
+ if (ip == cached_function)
+ return;
+
+ i = atomic_read(&nr_records);
+ /* nr_records is -1 when clearing records */
+ smp_mb__after_atomic();
+ if (i < 0)
+ return;
+
+ /*
+ * If there's two writers and this writer comes in second,
+ * the cmpxchg() below to update the ip will fail. Then this
+ * writer will try again. It is possible that index will now
+ * be greater than nr_records. This is because the writer
+ * that succeeded has not updated the nr_records yet.
+ * This writer could keep trying again until the other writer
+ * updates nr_records. But if the other writer takes an
+ * interrupt, and that interrupt locks up that CPU, we do
+ * not want this CPU to lock up due to the recursion protection,
+ * and have a bug report showing this CPU as the cause of
+ * locking up the computer. To not lose this record, this
+ * writer will simply use the next position to update the
+ * recursed_functions, and it will update the nr_records
+ * accordingly.
+ */
+ if (index < i)
+ index = i;
+ if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
+ return;
+
+ for (i = index - 1; i >= 0; i--) {
+ if (recursed_functions[i].ip == ip) {
+ cached_function = ip;
+ return;
+ }
+ }
+
+ cached_function = ip;
+
+ /*
+ * We only want to add a function if it hasn't been added before.
+ * Add to the current location before incrementing the count.
+ * If it fails to add, then increment the index (save in i)
+ * and try again.
+ */
+ old = cmpxchg(&recursed_functions[index].ip, 0, ip);
+ if (old != 0) {
+ /* Did something else already added this for us? */
+ if (old == ip)
+ return;
+ /* Try the next location (use i for the next index) */
+ index++;
+ goto again;
+ }
+
+ recursed_functions[index].parent_ip = parent_ip;
+
+ /*
+ * It's still possible that we could race with the clearing
+ * CPU0 CPU1
+ * ---- ----
+ * ip = func
+ * nr_records = -1;
+ * recursed_functions[0] = 0;
+ * i = -1
+ * if (i < 0)
+ * nr_records = 0;
+ * (new recursion detected)
+ * recursed_functions[0] = func
+ * cmpxchg(recursed_functions[0],
+ * func, 0)
+ *
+ * But the worse that could happen is that we get a zero in
+ * the recursed_functions array, and it's likely that "func" will
+ * be recorded again.
+ */
+ i = atomic_read(&nr_records);
+ smp_mb__after_atomic();
+ if (i < 0)
+ cmpxchg(&recursed_functions[index].ip, ip, 0);
+ else if (i <= index)
+ atomic_cmpxchg(&nr_records, i, index + 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_record_recursion);
+
+static DEFINE_MUTEX(recursed_function_lock);
+static struct trace_seq *tseq;
+
+static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos)
+{
+ void *ret = NULL;
+ int index;
+
+ mutex_lock(&recursed_function_lock);
+ index = atomic_read(&nr_records);
+ if (*pos < index) {
+ ret = &recursed_functions[*pos];
+ }
+
+ tseq = kzalloc(sizeof(*tseq), GFP_KERNEL);
+ if (!tseq)
+ return ERR_PTR(-ENOMEM);
+
+ trace_seq_init(tseq);
+
+ return ret;
+}
+
+static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int index;
+ int p;
+
+ index = atomic_read(&nr_records);
+ p = ++(*pos);
+
+ return p < index ? &recursed_functions[p] : NULL;
+}
+
+static void recursed_function_seq_stop(struct seq_file *m, void *v)
+{
+ kfree(tseq);
+ mutex_unlock(&recursed_function_lock);
+}
+
+static int recursed_function_seq_show(struct seq_file *m, void *v)
+{
+ struct recursed_functions *record = v;
+ int ret = 0;
+
+ if (record) {
+ trace_seq_print_sym(tseq, record->parent_ip, true);
+ trace_seq_puts(tseq, ":\t");
+ trace_seq_print_sym(tseq, record->ip, true);
+ trace_seq_putc(tseq, '\n');
+ ret = trace_print_seq(m, tseq);
+ }
+
+ return ret;
+}
+
+static const struct seq_operations recursed_function_seq_ops = {
+ .start = recursed_function_seq_start,
+ .next = recursed_function_seq_next,
+ .stop = recursed_function_seq_stop,
+ .show = recursed_function_seq_show
+};
+
+static int recursed_function_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ mutex_lock(&recursed_function_lock);
+ /* If this file was opened for write, then erase contents */
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ /* disable updating records */
+ atomic_set(&nr_records, -1);
+ smp_mb__after_atomic();
+ memset(recursed_functions, 0, sizeof(recursed_functions));
+ smp_wmb();
+ /* enable them again */
+ atomic_set(&nr_records, 0);
+ }
+ if (file->f_mode & FMODE_READ)
+ ret = seq_open(file, &recursed_function_seq_ops);
+ mutex_unlock(&recursed_function_lock);
+
+ return ret;
+}
+
+static ssize_t recursed_function_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static int recursed_function_release(struct inode *inode, struct file *file)
+{
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+ return 0;
+}
+
+static const struct file_operations recursed_functions_fops = {
+ .open = recursed_function_open,
+ .write = recursed_function_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = recursed_function_release,
+};
+
+__init static int create_recursed_functions(void)
+{
+ struct dentry *dentry;
+
+ dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
+ &recursed_functions_fops);
+ if (!dentry)
+ pr_warn("WARNING: Failed to create recursed_functions\n");
+ return 0;
+}
+
+fs_initcall(create_recursed_functions);
--
2.25.4
^ permalink raw reply related
* [PATCH 00/36] Rid W=1 issues from TTY
From: Lee Jones @ 2020-11-04 19:35 UTC (permalink / raw)
To: lee.jones
Cc: Robert Love, Nick Holloway, Russ Gorby, C. Scott Ananian, --,
Andrew Morton, Laxman Dewangan, Paul Mackerras, David A. Hinds,
linux-riscv, Jiri Slaby, linux-stm32, Bill Hawes, Roland Stigge,
Rob Herring, Russell King, Michal Simek, Jonathan Hunter,
Jan Dumon, Andy Gross, linux-serial, Sylvain Lemieux,
Gerald Baeza, Sumit Semwal, Marko Kohtala, linux-media,
Philipp Zabel, Alexandre Torgue, linux-arm-msm,
Vladimir Zapolskiy, linaro-mm-sig, Stanislav Voronyi, dri-devel,
Paul Walmsley, linux-tegra, Bjorn Andersson, Andrew J. Kroll,
processes-Sapan Bhatia, Kevin Wells, Miloslav Trmac, Mike Hudson,
Joseph Barrow, linux-kernel, paulkf, Filip Aben, Palmer Dabbelt,
Maxime Coquelin, Greg Kroah-Hartman, Thierry Reding,
Colin Ian King, Jakub Jelinek, linuxppc-dev, Christian König,
Russell King
This set is part of a larger effort attempting to clean-up W=1
kernel builds, which are currently overwhelmingly riddled with
niggly little warnings.
Lee Jones (36):
tty: serdev: core: Remove unused variable 'dummy'
tty: serdev: core: Provide missing description for 'owner'
tty: tty_baudrate: Add missing description for 'tty'
tty: tty_io: Move 'tty_sysctl_init's prototype to shared space
tty: tty_buffer: Add missing description for 'limit'
tty: tty_port: Demote obvious abuse of kernel-doc formatting
tty: tty_jobctrl: Add missing function parameter descriptions
tty: tty_ldisc: Fix some kernel-doc related misdemeanours
tty: vt: consolemap: Demote weakly documented function header
tty: n_tty: Add 2 missing parameter descriptions
tty: serial: jsm: jsm_cls: Remove unused variable 'discard'
tty: tty_io: Fix some kernel-doc issues
tty: serial: 8250: 8250_port: Staticify functions referenced by
pointers
tty: serial: 8250: serial_cs: Remove unused/unchecked variable 'err'
tty: tty_audit: Demote non-conformant kernel-doc headers
tty: pty: Provide descriptions for the 'file' parameters
tty: serial: amba-pl011: Mark 'sbsa_uart_acpi_match' as __maybe_unused
tty: n_gsm: Demote obvious abuse of kernel-doc and supply other
missing docss
tty: serial: lpc32xx_hs: Remove unused variable 'tmp'
tty: serial: msm_serial: Remove set but unused variable 'status'
tty: serial: ifx6x60: Fix function documentation headers
tty: serial: xilinx_uartps: Supply description for missing member
'cts_override'
tty: synclink_gt: Demote one kernel-doc header and repair another
tty: serial: serial-tegra: Struct headers should start with 'struct
<name>'
tty: serial: sifive: Struct headers should start with 'struct <name>'
tty: synclinkmp: Add missing description for function param 'txqueue'
tty: synclinkmp: Mark never checked 'readval' as __always_unused
tty: serial: stm32-usart: Remove set but unused 'cookie' variables
tty: tty_ldisc: Supply missing description for 'tty_ldisc_get's 'tty'
param
tty: serial: serial-tegra: Provide some missing struct member
descriptions
powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to
shared location
tty: hvc: hvc_vio: Staticify function invoked only by reference
tty: hvc: hvc_opal: Staticify function invoked by reference
tty: serial: pmac_zilog: Make disposable variable __always_unused
tty: synclink: Mark disposable variables as __always_unused
tty: synclink: Provide missing description for 'hdlcdev_tx_timeout's
'txqueue' param
arch/powerpc/include/asm/hvconsole.h | 3 +++
arch/powerpc/platforms/pseries/pseries.h | 3 ---
arch/powerpc/platforms/pseries/setup.c | 1 +
drivers/tty/hvc/hvc_opal.c | 2 +-
drivers/tty/hvc/hvc_vio.c | 2 +-
drivers/tty/n_gsm.c | 4 +++-
drivers/tty/n_tty.c | 2 ++
drivers/tty/pty.c | 2 ++
drivers/tty/serdev/core.c | 6 ++----
drivers/tty/serial/8250/8250_port.c | 4 ++--
drivers/tty/serial/8250/serial_cs.c | 9 +++------
drivers/tty/serial/amba-pl011.c | 2 +-
drivers/tty/serial/ifx6x60.c | 3 ++-
drivers/tty/serial/jsm/jsm_cls.c | 4 +---
drivers/tty/serial/lpc32xx_hs.c | 3 +--
drivers/tty/serial/msm_serial.c | 3 +--
drivers/tty/serial/pmac_zilog.h | 2 +-
drivers/tty/serial/serial-tegra.c | 7 ++++++-
drivers/tty/serial/sifive.c | 3 ++-
drivers/tty/serial/stm32-usart.c | 6 ++----
drivers/tty/serial/xilinx_uartps.c | 1 +
drivers/tty/synclink.c | 5 +++--
drivers/tty/synclink_gt.c | 3 ++-
drivers/tty/synclinkmp.c | 3 ++-
drivers/tty/tty_audit.c | 12 ++++++------
drivers/tty/tty_baudrate.c | 1 +
drivers/tty/tty_buffer.c | 1 +
drivers/tty/tty_io.c | 12 +++++++-----
drivers/tty/tty_jobctrl.c | 3 +++
drivers/tty/tty_ldisc.c | 11 ++++++-----
drivers/tty/tty_port.c | 4 ++--
drivers/tty/vt/consolemap.c | 3 +--
include/linux/tty.h | 1 +
33 files changed, 73 insertions(+), 58 deletions(-)
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: "Andrew J. Kroll" <ag784@freenet.buffalo.edu>
Cc: Andrew Morton <andrewm@uow.edu.eu>
Cc: Andy Gross <agross@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bill Hawes <whawes@star.net>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: "C. Scott Ananian" <cananian@alumni.princeton.edu>
Cc: "David A. Hinds" <dahinds@users.sourceforge.net>
Cc: dri-devel@lists.freedesktop.org
Cc: Filip Aben <f.aben@option.com>
Cc: Gerald Baeza <gerald.baeza@st.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jakub Jelinek <jj@ultra.linux.cz>
Cc: Jan Dumon <j.dumon@option.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Jonathan Hunter <jonathanh@nvidia.com>
Cc: Joseph Barrow <d.barow@option.com>
Cc: -- <julian@uhunix.uhcc.hawaii.edu>
Cc: Kevin Wells <kevin.wells@nxp.com>
Cc: Laxman Dewangan <ldewangan@nvidia.com>
Cc: linaro-mm-sig@lists.linaro.org
Cc: linux-arm-msm@vger.kernel.org
Cc: linux-media@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-serial@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-tegra@vger.kernel.org
Cc: Marko Kohtala <Marko.Kohtala@hut.fi>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Mike Hudson <Exoray@isys.ca>
Cc: Miloslav Trmac <mitr@redhat.com>
Cc: Nick Holloway <alfie@dcs.warwick.ac.uk>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: paulkf@microgate.com
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: processes-Sapan Bhatia <sapan@corewars.org>
Cc: Robert Love <rlove@google.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Roland Stigge <stigge@antcom.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Russ Gorby <russ.gorby@intel.com>
Cc: Stanislav Voronyi <stas@cnti.uanet.kharkov.ua>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Sylvain Lemieux <slemieux.tyco@gmail.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Vladimir Zapolskiy <vz@mleia.com>
--
2.25.1
^ permalink raw reply
* [PATCH 31/36] powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to shared location
From: Lee Jones @ 2020-11-04 19:35 UTC (permalink / raw)
To: lee.jones; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <20201104193549.4026187-1-lee.jones@linaro.org>
Fixes the following W=1 kernel build warning(s):
drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ [-Wmissing-prototypes]
385 | void __init hvc_vio_init_early(void)
| ^~~~~~~~~~~~~~~~~~
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
arch/powerpc/include/asm/hvconsole.h | 3 +++
arch/powerpc/platforms/pseries/pseries.h | 3 ---
arch/powerpc/platforms/pseries/setup.c | 1 +
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
index 999ed5ac90531..936a1ee1ac786 100644
--- a/arch/powerpc/include/asm/hvconsole.h
+++ b/arch/powerpc/include/asm/hvconsole.h
@@ -24,5 +24,8 @@
extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
+/* Provided by HVC VIO */
+extern void hvc_vio_init_early(void);
+
#endif /* __KERNEL__ */
#endif /* _PPC64_HVCONSOLE_H */
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 13fa370a87e4e..7be5b054dfc36 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
/* Poweron flag used for enabling auto ups restart */
extern unsigned long rtas_poweron_auto;
-/* Provided by HVC VIO */
-extern void hvc_vio_init_early(void);
-
/* Dynamic logical Partitioning/Mobility */
extern void dlpar_free_cc_nodes(struct device_node *);
extern void dlpar_free_cc_property(struct property *);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 633c45ec406da..6999b83f06612 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
#include <asm/swiotlb.h>
#include <asm/svm.h>
#include <asm/dtl.h>
+#include <asm/hvconsole.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
--
2.25.1
^ permalink raw reply related
* [PATCH 32/36] tty: hvc: hvc_vio: Staticify function invoked only by reference
From: Lee Jones @ 2020-11-04 19:35 UTC (permalink / raw)
To: lee.jones; +Cc: Greg Kroah-Hartman, linuxppc-dev, Jiri Slaby, linux-kernel
In-Reply-To: <20201104193549.4026187-1-lee.jones@linaro.org>
Fixes the following W=1 kernel build warning(s):
drivers/tty/hvc/hvc_vio.c:181:6: warning: no previous prototype for ‘hvterm_hvsi_hangup’ [-Wmissing-prototypes]
181 | void hvterm_hvsi_hangup(struct hvc_struct *hp, int data)
| ^~~~~~~~~~~~~~~~~~
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
drivers/tty/hvc/hvc_vio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index 7af54d6ed5b84..798f27f40cc2d 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -178,7 +178,7 @@ static void hvterm_hvsi_close(struct hvc_struct *hp, int data)
notifier_del_irq(hp, data);
}
-void hvterm_hvsi_hangup(struct hvc_struct *hp, int data)
+static void hvterm_hvsi_hangup(struct hvc_struct *hp, int data)
{
struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
--
2.25.1
^ permalink raw reply related
* [PATCH 34/36] tty: serial: pmac_zilog: Make disposable variable __always_unused
From: Lee Jones @ 2020-11-04 19:35 UTC (permalink / raw)
To: lee.jones
Cc: Greg Kroah-Hartman, linuxppc-dev, linux-kernel, Paul Mackerras,
linux-serial, Jiri Slaby
In-Reply-To: <20201104193549.4026187-1-lee.jones@linaro.org>
Fixes the following W=1 kernel build warning(s):
drivers/tty/serial/pmac_zilog.h:365:58: warning: variable ‘garbage’ set but not used [-Wunused-but-set-variable]
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linux-serial@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
drivers/tty/serial/pmac_zilog.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/pmac_zilog.h b/drivers/tty/serial/pmac_zilog.h
index bb874e76810e0..968aec7c1cf82 100644
--- a/drivers/tty/serial/pmac_zilog.h
+++ b/drivers/tty/serial/pmac_zilog.h
@@ -362,7 +362,7 @@ static inline void zssync(struct uart_pmac_port *port)
/* Misc macros */
#define ZS_CLEARERR(port) (write_zsreg(port, 0, ERR_RES))
-#define ZS_CLEARFIFO(port) do { volatile unsigned char garbage; \
+#define ZS_CLEARFIFO(port) do { volatile unsigned char __always_unused garbage; \
garbage = read_zsdata(port); \
garbage = read_zsdata(port); \
garbage = read_zsdata(port); \
--
2.25.1
^ permalink raw reply related
* [PATCH 33/36] tty: hvc: hvc_opal: Staticify function invoked by reference
From: Lee Jones @ 2020-11-04 19:35 UTC (permalink / raw)
To: lee.jones
Cc: linuxppc-dev, linux-kernel, Paul Mackerras, Greg Kroah-Hartman,
Jiri Slaby
In-Reply-To: <20201104193549.4026187-1-lee.jones@linaro.org>
Fixes the following W=1 kernel build warning(s):
drivers/tty/hvc/hvc_opal.c:106:6: warning: no previous prototype for ‘hvc_opal_hvsi_hangup’ [-Wmissing-prototypes]
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
drivers/tty/hvc/hvc_opal.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index c66412566efce..056ae21a51214 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -103,7 +103,7 @@ static void hvc_opal_hvsi_close(struct hvc_struct *hp, int data)
notifier_del_irq(hp, data);
}
-void hvc_opal_hvsi_hangup(struct hvc_struct *hp, int data)
+static void hvc_opal_hvsi_hangup(struct hvc_struct *hp, int data)
{
struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
--
2.25.1
^ permalink raw reply related
* Re: [PATCH v3 1/2] ASoC: dt-bindings: fsl_aud2htx: Add binding doc for aud2htx module
From: Rob Herring @ 2020-11-04 22:34 UTC (permalink / raw)
To: Shengjiu Wang
Cc: devicetree, alsa-devel, timur, Xiubo.Lee, festevam, lgirdwood,
robh+dt, tiwai, nicoleotsuka, broonie, perex, linuxppc-dev,
linux-kernel
In-Reply-To: <1604281947-26874-1-git-send-email-shengjiu.wang@nxp.com>
On Mon, 02 Nov 2020 09:52:26 +0800, Shengjiu Wang wrote:
> AUD2HTX (Audio Subsystem TO HDMI TX Subsystem) is a new
> IP module found on i.MX8MP.
>
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> ---
> changes in v3:
> - Add additionalProperties
>
> changes in v2:
> - fix indentation issue
> - remove nodename
>
> .../bindings/sound/fsl,aud2htx.yaml | 66 +++++++++++++++++++
> 1 file changed, 66 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
>
Reviewed-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-04 23:18 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #4 from Erhard F. (erhard_f@mailbox.org) ---
(In reply to Christophe Leroy from comment #3)
> Could you test whether CONFIG_KASAN works:
> - on 5.10-rc1 with that commit reverted ?
> - on 5.9 ?
KASAN works in both cases. I'll attach kernel dmesg and config.
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-04 23:19 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #5 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 293495
--> https://bugzilla.kernel.org/attachment.cgi?id=293495&action=edit
kernel .config (5.10-rc1 + KASAN, PowerMac G4 DP)
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-04 23:20 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #6 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 293497
--> https://bugzilla.kernel.org/attachment.cgi?id=293497&action=edit
dmesg (5.10-rc1 + patch reverted + KASAN, PowerMac G4 DP)
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-04 23:21 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #7 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 293499
--> https://bugzilla.kernel.org/attachment.cgi?id=293499&action=edit
kernel .config (5.9.3 + KASAN, PowerMac G4 DP)
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [Bug 209869] Kernel 5.10-rc1 fails to boot on a PowerMac G4 3,6 at an early stage
From: bugzilla-daemon @ 2020-11-04 23:21 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-209869-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=209869
--- Comment #8 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 293501
--> https://bugzilla.kernel.org/attachment.cgi?id=293501&action=edit
dmesg (5.9.3 + KASAN, PowerMac G4 DP)
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* Re: [PATCH 31/36] powerpc: asm: hvconsole: Move 'hvc_vio_init_early's prototype to shared location
From: Michael Ellerman @ 2020-11-04 23:36 UTC (permalink / raw)
To: Lee Jones, lee.jones; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <20201104193549.4026187-32-lee.jones@linaro.org>
Lee Jones <lee.jones@linaro.org> writes:
> Fixes the following W=1 kernel build warning(s):
>
> drivers/tty/hvc/hvc_vio.c:385:13: warning: no previous prototype for ‘hvc_vio_init_early’ [-Wmissing-prototypes]
> 385 | void __init hvc_vio_init_early(void)
> | ^~~~~~~~~~~~~~~~~~
>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
> arch/powerpc/include/asm/hvconsole.h | 3 +++
> arch/powerpc/platforms/pseries/pseries.h | 3 ---
> arch/powerpc/platforms/pseries/setup.c | 1 +
> 3 files changed, 4 insertions(+), 3 deletions(-)
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
> diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
> index 999ed5ac90531..936a1ee1ac786 100644
> --- a/arch/powerpc/include/asm/hvconsole.h
> +++ b/arch/powerpc/include/asm/hvconsole.h
> @@ -24,5 +24,8 @@
> extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
> extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
>
> +/* Provided by HVC VIO */
> +extern void hvc_vio_init_early(void);
extern isn't needed, but don't feel you need to respin just to drop it.
cheers
^ permalink raw reply
* Re: [PATCH v3 2/2] ASoC: fsl_aud2htx: Add aud2htx module driver
From: Nicolin Chen @ 2020-11-05 1:35 UTC (permalink / raw)
To: Shengjiu Wang
Cc: devicetree, alsa-devel, timur, Xiubo.Lee, lgirdwood, linuxppc-dev,
tiwai, robh+dt, perex, broonie, festevam, linux-kernel
In-Reply-To: <1604281947-26874-2-git-send-email-shengjiu.wang@nxp.com>
On Mon, Nov 02, 2020 at 09:52:27AM +0800, Shengjiu Wang wrote:
> The AUD2HTX is a digital module that provides a bridge between
> the Audio Subsystem and the HDMI RTX Subsystem. This module
> includes intermediate storage to queue SDMA transactions prior
> to being synchronized and passed to the HDMI RTX Subsystem over
> the Audio Link.
>
> The AUD2HTX contains a DMA request routed to the SDMA module.
> This DMA request is controlled based on the watermark level in
> the 32-entry sample buffer.
>
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
Despite some small comments inline.
> +static int fsl_aud2htx_dai_probe(struct snd_soc_dai *cpu_dai)
> +{
> + struct fsl_aud2htx *aud2htx = dev_get_drvdata(cpu_dai->dev);
> +
> + /* DMA request when number of entries < WTMK_LOW */
> + regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> + AUD2HTX_CTRE_DT_MASK, 0);
> +
> + /* Disable interrupts*/
> + regmap_update_bits(aud2htx->regmap, AUD2HTX_IRQ_MASK,
> + AUD2HTX_WM_HIGH_IRQ_MASK |
> + AUD2HTX_WM_LOW_IRQ_MASK |
> + AUD2HTX_OVF_MASK,
> + AUD2HTX_WM_HIGH_IRQ_MASK |
> + AUD2HTX_WM_LOW_IRQ_MASK |
> + AUD2HTX_OVF_MASK);
> +
> + /* Configure watermark */
> + regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> + AUD2HTX_CTRE_WL_MASK,
> + AUD2HTX_WTMK_LOW << AUD2HTX_CTRE_WL_SHIFT);
> + regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> + AUD2HTX_CTRE_WH_MASK,
> + AUD2HTX_WTMK_HIGH << AUD2HTX_CTRE_WH_SHIFT);
If there isn't a hard requirement from hardware, feels better to
combine all the writes to AUD2HTX_CTRL_EXT into one single MMIO.
> +static irqreturn_t fsl_aud2htx_isr(int irq, void *dev_id)
> +{
> + return IRQ_HANDLED;
Empty isr? Perhaps can drop the request_irq() at all?
> +static int fsl_aud2htx_probe(struct platform_device *pdev)
> +{
> + struct fsl_aud2htx *aud2htx;
> + struct resource *res;
> + void __iomem *regs;
> + int ret, irq;
> +
> + aud2htx = devm_kzalloc(&pdev->dev, sizeof(*aud2htx), GFP_KERNEL);
> + if (!aud2htx)
> + return -ENOMEM;
> +
> + aud2htx->pdev = pdev;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + regs = devm_ioremap_resource(&pdev->dev, res);
> + if (IS_ERR(regs)) {
> + dev_err(&pdev->dev, "failed ioremap\n");
> + return PTR_ERR(regs);
> + }
> +
> + aud2htx->regmap = devm_regmap_init_mmio(&pdev->dev, regs,
> + &fsl_aud2htx_regmap_config);
> + if (IS_ERR(aud2htx->regmap)) {
> + dev_err(&pdev->dev, "failed to init regmap");
> + return PTR_ERR(aud2htx->regmap);
> + }
> +
> + irq = platform_get_irq(pdev, 0);
> + if (irq < 0) {
> + dev_err(&pdev->dev, "no irq for node %s\n",
> + dev_name(&pdev->dev));
dev_err() already prints dev_name, so not necessary to print again.
^ permalink raw reply
* Re: [PATCH v1 4/4] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: Michael Ellerman @ 2020-11-05 2:40 UTC (permalink / raw)
To: David Hildenbrand, linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, Michal Hocko, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201029162718.29910-5-david@redhat.com>
David Hildenbrand <david@redhat.com> writes:
> Let's use alloc_contig_pages() for allocating memory and remove the
> linear mapping manually via arch_remove_linear_mapping(). Mark all pages
> PG_offline, such that they will definitely not get touched - e.g.,
> when hibernating. When freeing memory, try to revert what we did.
>
> The original idea was discussed in:
> https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com
>
> This is similar to CONFIG_DEBUG_PAGEALLOC handling on other
> architectures, whereby only single pages are unmapped from the linear
> mapping. Let's mimic what memory hot(un)plug would do with the linear
> mapping.
>
> We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies.
>
> Simple test under QEMU TCG (10GB RAM, single NUMA node):
>
> sh-5.0# mount -t debugfs none /sys/kernel/debug/
> sh-5.0# cat /sys/devices/system/memory/block_size_bytes
> 40000000
> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
> [ 71.052836][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
> sh-5.0# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable
> [ 75.424302][ T356] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages
> [ 75.430549][ T356] memtrace: Freed trace memory back on node 0
> [ 75.604520][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
> sh-5.0# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable
> [ 80.418835][ T356] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages
> [ 80.430493][ T356] memtrace: Freed trace memory back on node 0
> [ 80.433882][ T356] memtrace: Failed to allocate trace memory on node 0
> sh-5.0# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
> [ 91.920158][ T356] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
I gave this a quick spin on a real machine, seems to work OK.
I don't have the actual memtrace tools setup to do an actual trace, will
try and get someone to test that also.
One observation is that previously the memory was zeroed when enabling
the memtrace, whereas now it's not.
eg, before:
# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
10000000
whereas after:
# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000080 e0 fd 43 00 00 00 00 00 e0 fd 43 00 00 00 00 00 |..C.......C.....|
00000090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000830 98 bf 39 00 00 00 00 00 98 bf 39 00 00 00 00 00 |..9.......9.....|
00000840 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
000008a0 b0 c8 47 00 00 00 00 00 b0 c8 47 00 00 00 00 00 |..G.......G.....|
000008b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
...
0fffff70 78 53 49 7d 00 00 29 2e 88 00 92 41 01 00 49 39 |xSI}..)....A..I9|
0fffff80 b4 07 4a 7d 28 f8 00 7d 00 48 08 7c 0c 00 c2 40 |..J}(..}.H.|...@|
0fffff90 2d f9 40 7d f0 ff c2 40 b4 07 0a 7d 00 48 8a 7f |-.@}...@...}.H..|
0fffffa0 70 fe 9e 41 cc ff ff 4b 00 00 00 60 00 00 00 60 |p..A...K...`...`|
0fffffb0 01 00 00 48 00 00 00 60 00 00 a3 2f 0c fd 9e 40 |...H...`.../...@|
0fffffc0 00 00 a2 3c 00 00 a5 e8 00 00 62 3c 00 00 63 e8 |...<......b<..c.|
0fffffd0 01 00 20 39 83 02 80 38 00 00 3c 99 01 00 00 48 |.. 9...8..<....H|
0fffffe0 00 00 00 60 e4 fc ff 4b 00 00 80 38 78 fb e3 7f |...`...K...8x...|
0ffffff0 01 00 00 48 00 00 00 60 2c fe ff 4b 00 00 00 60 |...H...`,..K...`|
10000000
That's a nice way for root to read kernel memory, so we should probably
add a __GFP_ZERO or memset in there somewhere.
cheers
^ permalink raw reply
* [PATCH v2 2/8] powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Reuse the "safe" implementation from signal.c except for calling
unsafe_copy_from_user() to copy into a local buffer. Unlike the
unsafe_copy_{vsx,fpr}_to_user() functions the "copy from" functions
cannot use unsafe_get_user() directly to bypass the local buffer since
doing so significantly reduces signal handling performance.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal.h | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 2559a681536e..e9aaeac0da37 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -53,6 +53,33 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
&buf[i], label);\
} while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *__f = (u64 __user *)from; \
+ u64 buf[ELF_NFPREG]; \
+ int i; \
+ \
+ unsafe_copy_from_user(buf, __f, ELF_NFPREG * sizeof(double), \
+ label); \
+ for (i = 0; i < ELF_NFPREG - 1; i++) \
+ __t->thread.TS_FPR(i) = buf[i]; \
+ __t->thread.fp_state.fpscr = buf[i]; \
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *__f = (u64 __user *)from; \
+ u64 buf[ELF_NVSRHALFREG]; \
+ int i; \
+ \
+ unsafe_copy_from_user(buf, __f, \
+ ELF_NVSRHALFREG * sizeof(double), \
+ label); \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ __t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; \
+} while (0)
+
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
#define unsafe_copy_ckfpr_to_user(to, task, label) do { \
struct task_struct *__t = task; \
@@ -80,6 +107,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \
ELF_NFPREG * sizeof(double), label)
+#define unsafe_copy_fpr_from_user(task, from, label) \
+ unsafe_copy_from_user((task)->thread.fp_state.fpr, from \
+ ELF_NFPREG * sizeof(double), label)
+
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
{
@@ -115,6 +146,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
#else
#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
{
--
2.29.0
^ permalink raw reply related
* [PATCH v2 3/8] powerpc/signal64: Move non-inline functions out of setup_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
There are non-inline functions which get called in setup_sigcontext() to
save register state to the thread struct. Move these functions into a
separate prepare_setup_sigcontext() function so that
setup_sigcontext() can be refactored later into an "unsafe" version
which assumes an open uaccess window. Non-inline functions should be
avoided when uaccess is open.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 7df088b9ad0f..ece1f982dd05 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
}
#endif
+static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_region)
+{
+#ifdef CONFIG_ALTIVEC
+ /* save altivec registers */
+ if (tsk->thread.used_vr)
+ flush_altivec_to_thread(tsk);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+ flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+ if (tsk->thread.used_vsr && ctx_has_vsx_region)
+ flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
/*
* Set up the sigcontext for the signal frame.
*/
@@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
*/
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
- unsigned long vrsave;
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
@@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* save altivec registers */
if (tsk->thread.used_vr) {
- flush_altivec_to_thread(tsk);
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
33 * sizeof(vector128));
@@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- vrsave = 0;
- if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
- vrsave = mfspr(SPRN_VRSAVE);
- tsk->thread.vrsave = vrsave;
- }
-
- err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+ err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
#else /* CONFIG_ALTIVEC */
err |= __put_user(0, &sc->v_regs);
#endif /* CONFIG_ALTIVEC */
- flush_fp_to_thread(tsk);
/* copy fpr regs and fpscr */
err |= copy_fpr_to_user(&sc->fp_regs, tsk);
@@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
* VMX data.
*/
if (tsk->thread.used_vsr && ctx_has_vsx_region) {
- flush_vsx_to_thread(tsk);
v_regs += ELF_NVRREG;
err |= copy_vsx_to_user(v_regs, tsk);
/* set MSR_VSX in the MSR value in the frame to
@@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
ctx_has_vsx_region = 1;
if (old_ctx != NULL) {
+ prepare_setup_sigcontext(current, ctx_has_vsx_region);
if (!access_ok(old_ctx, ctx_size)
|| setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
ctx_has_vsx_region)
@@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#endif
{
err |= __put_user(0, &frame->uc.uc_link);
+ prepare_setup_sigcontext(tsk, 1);
err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
NULL, (unsigned long)ksig->ka.sa.sa_handler,
1);
--
2.29.0
^ permalink raw reply related
* [PATCH v2 4/8] powerpc/signal64: Remove TM ifdefery in middle of if/else block
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Similar to commit 1c32940f5220 ("powerpc/signal32: Remove ifdefery in
middle of if/else") for PPC32, remove the messy ifdef. Unlike PPC32, the
ifdef cannot be removed entirely since the uc_transact member of the
sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM=y.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ece1f982dd05..d3e9519b2e62 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
struct pt_regs *regs = current_pt_regs();
struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long msr;
-#endif
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -762,10 +760,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
* restore_tm_sigcontexts.
*/
regs->msr &= ~MSR_TS_MASK;
+#endif
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
@@ -778,9 +778,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
- } else
#endif
- {
+ } else {
/*
* Fall through, for non-TM restore
*
@@ -818,10 +817,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long newsp = 0;
long err = 0;
struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* Save the thread's msr before get_tm_stackpointer() changes it */
- unsigned long msr = regs->msr;
-#endif
+ unsigned long msr __maybe_unused = regs->msr;
frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
if (!access_ok(frame, sizeof(*frame)))
@@ -836,8 +833,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
@@ -847,9 +845,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler,
msr);
- } else
#endif
- {
+ } else {
err |= __put_user(0, &frame->uc.uc_link);
prepare_setup_sigcontext(tsk, 1);
err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
--
2.29.0
^ permalink raw reply related
* [PATCH v2 0/8] Improve signal performance on PPC64 with KUAP
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
As reported by Anton, there is a large penalty to signal handling
performance on radix systems using KUAP. The signal handling code
performs many user access operations, each of which needs to switch the
KUAP permissions bit to open and then close user access. This involves a
costly 'mtspr' operation [0].
There is existing work done on x86 and by Christopher Leroy for PPC32 to
instead open up user access in "blocks" using user_*_access_{begin,end}.
We can do the same in PPC64 to bring performance back up on KUAP-enabled
radix systems.
This series applies on top of Christophe Leroy's work for PPC32 [1] (I'm
sure patchwork won't be too happy about that).
The first two patches add some needed 'unsafe' versions of copy-from
functions. While these do not make use of asm-goto they still allow for
avoiding the repeated uaccess switches.
The third patch moves functions called by setup_sigcontext() into a new
prepare_setup_sigcontext() to simplify converting setup_sigcontext()
into an 'unsafe' version which assumes an open uaccess window later.
The fourth patch cleans-up some of the Transactional Memory ifdef stuff
to simplify using uaccess blocks later.
The next two patches rewrite some of the signal64 helper functions to
be 'unsafe'. Finally, the last two patches update the main signal
handling functions to make use of the new 'unsafe' helpers and eliminate
some additional uaccess switching.
I used the will-it-scale signal1 benchmark to measure and compare
performance [2]. The below results are from a P9 Blackbird system. Note
that currently hash does not support KUAP and is therefore used as the
"baseline" comparison. Bigger numbers are better:
signal1_threads -t1 -s10
| | hash | radix |
| --------------- | ------ | ------ |
| linuxppc/next | 289014 | 158408 |
| unsafe-signal64 | 298506 | 253053 |
[0]: https://github.com/linuxppc/issues/issues/277
[1]: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=196278
[2]: https://github.com/antonblanchard/will-it-scale/blob/master/tests/signal1.c
v2: * Rebase on latest linuxppc/next + Christophe Leroy's PPC32
signal series
* Simplify/remove TM ifdefery similar to PPC32 series and clean
up the uaccess begin/end calls
* Isolate non-inline functions so they are not called when
uaccess window is open
Christopher M. Riedl (6):
powerpc/uaccess: Add unsafe_copy_from_user
powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
powerpc/signal64: Move non-inline functions out of setup_sigcontext()
powerpc/signal64: Remove TM ifdefery in middle of if/else block
powerpc/signal64: Replace setup_sigcontext() w/
unsafe_setup_sigcontext()
powerpc/signal64: Replace restore_sigcontext() w/
unsafe_restore_sigcontext()
Daniel Axtens (2):
powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess
switches
powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches
arch/powerpc/include/asm/uaccess.h | 28 ++--
arch/powerpc/kernel/signal.h | 33 ++++
arch/powerpc/kernel/signal_64.c | 239 ++++++++++++++++++-----------
3 files changed, 201 insertions(+), 99 deletions(-)
--
2.29.0
^ permalink raw reply
* [PATCH v2 8/8] powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches
From: Christopher M. Riedl @ 2020-11-05 5:17 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
From: Daniel Axtens <dja@axtens.net>
Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.
Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d17f2d5436d2..82e68a508e5c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -784,8 +784,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
regs->msr &= ~MSR_TS_MASK;
#endif
- if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+ if (!user_read_access_begin(uc, sizeof(*uc)))
goto badframe;
+
+ unsafe_get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR], badframe_block);
+
if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* We recheckpoint on return. */
@@ -793,10 +796,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
/* Trying to start TM on non TM system */
if (!cpu_has_feature(CPU_FTR_TM))
- goto badframe;
+ goto badframe_block;
+
+ unsafe_get_user(uc_transact, &uc->uc_link, badframe_block);
+
+ user_read_access_end();
- if (__get_user(uc_transact, &uc->uc_link))
- goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
@@ -815,12 +820,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
* causing a TM bad thing.
*/
current->thread.regs->msr &= ~MSR_TS_MASK;
- if (!user_read_access_begin(uc, sizeof(*uc)))
- return -EFAULT;
- if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
- user_read_access_end();
- goto badframe;
- }
+ unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+ badframe_block);
+
user_read_access_end();
}
@@ -830,6 +832,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_thread_flag(TIF_RESTOREALL);
return 0;
+badframe_block:
+ user_read_access_end();
badframe:
signal_fault(current, regs, "rt_sigreturn", uc);
--
2.29.0
^ permalink raw reply related
* [PATCH v2 6/8] powerpc/signal64: Replace restore_sigcontext() w/ unsafe_restore_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Previously restore_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.
Rewrite restore_sigcontext() to assume that a userspace read access
window is open. Replace all uaccess functions with their 'unsafe'
versions which avoid the repeated uaccess switches.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 68 ++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3f25309826b6..d72153825719 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -326,14 +326,14 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
/*
* Restore the sigcontext from the signal frame.
*/
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
- struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, e) \
+ unsafe_op_wrap(__unsafe_restore_sigcontext(tsk, set, sig, sc), e)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+ int sig, struct sigcontext __user *sc)
{
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs;
#endif
- unsigned long err = 0;
unsigned long save_r13 = 0;
unsigned long msr;
struct pt_regs *regs = tsk->thread.regs;
@@ -348,27 +348,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
save_r13 = regs->gpr[13];
/* copy the GPRs */
- err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
- err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+ unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr),
+ efault_out);
+ unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
/* get MSR separately, transfer the LE bit if doing signal return */
- err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
if (sig)
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
- err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
- err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
- err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
- err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
- err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+ unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+ unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+ unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+ unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+ unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
/* Don't allow userspace to set SOFTE */
set_trap_norestart(regs);
- err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
- err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
- err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+ unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+ unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+ unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
if (!sig)
regs->gpr[13] = save_r13;
if (set != NULL)
- err |= __get_user(set->sig[0], &sc->oldmask);
+ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
/*
* Force reload of FP/VEC.
@@ -378,29 +379,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
#ifdef CONFIG_ALTIVEC
- err |= __get_user(v_regs, &sc->v_regs);
- if (err)
- return err;
+ unsafe_get_user(v_regs, &sc->v_regs, efault_out);
if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && (msr & MSR_VEC) != 0) {
- err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
- 33 * sizeof(vector128));
+ unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+ 33 * sizeof(vector128), efault_out);
tsk->thread.used_vr = true;
} else if (tsk->thread.used_vr) {
memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
if (v_regs != NULL)
- err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33],
+ efault_out);
else
tsk->thread.vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+ unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -409,14 +409,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
*/
v_regs += ELF_NVRREG;
if ((msr & MSR_VSX) != 0) {
- err |= copy_vsx_from_user(tsk, v_regs);
+ unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
tsk->thread.used_vsr = true;
} else {
for (i = 0; i < 32 ; i++)
tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
#endif
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -701,8 +704,14 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
do_exit(SIGSEGV);
set_current_blocked(&set);
- if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+ if (!user_read_access_begin(new_ctx, ctx_size))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+ user_read_access_end();
do_exit(SIGSEGV);
+ }
+ user_read_access_end();
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
@@ -806,8 +815,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
* causing a TM bad thing.
*/
current->thread.regs->msr &= ~MSR_TS_MASK;
- if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+ if (!user_read_access_begin(uc, sizeof(*uc)))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
+ user_read_access_end();
goto badframe;
+ }
+ user_read_access_end();
}
if (restore_altstack(&uc->uc_stack))
--
2.29.0
^ permalink raw reply related
* [PATCH v2 1/8] powerpc/uaccess: Add unsafe_copy_from_user
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Implement raw_copy_from_user_allowed() which assumes that userspace read
access is open. Use this new function to implement raw_copy_from_user().
Finally, wrap the new function to follow the usual "unsafe_" convention
of taking a label argument. The new raw_copy_from_user_allowed() calls
__copy_tofrom_user() internally, but this is still safe to call in user
access blocks formed with user_*_access_begin()/user_*_access_end()
since asm functions are not instrumented for tracing.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index ef5bbb705c08..96b4abab4f5a 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
}
#endif /* __powerpc64__ */
-static inline unsigned long raw_copy_from_user(void *to,
- const void __user *from, unsigned long n)
+static inline unsigned long
+raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
{
- unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- ret = 1;
+ unsigned long ret = 1;
switch (n) {
case 1:
barrier_nospec();
- __get_user_size(*(u8 *)to, from, 1, ret);
+ __get_user_size_allowed(*(u8 *)to, from, 1, ret);
break;
case 2:
barrier_nospec();
- __get_user_size(*(u16 *)to, from, 2, ret);
+ __get_user_size_allowed(*(u16 *)to, from, 2, ret);
break;
case 4:
barrier_nospec();
- __get_user_size(*(u32 *)to, from, 4, ret);
+ __get_user_size_allowed(*(u32 *)to, from, 4, ret);
break;
case 8:
barrier_nospec();
- __get_user_size(*(u64 *)to, from, 8, ret);
+ __get_user_size_allowed(*(u64 *)to, from, 8, ret);
break;
}
if (ret == 0)
return 0;
}
+ return __copy_tofrom_user((__force void __user *)to, from, n);
+}
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ unsigned long ret;
+
barrier_nospec();
allow_read_from_user(from, n);
- ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ ret = raw_copy_from_user_allowed(to, from, n);
prevent_read_from_user(from, n);
return ret;
}
@@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
+#define unsafe_copy_from_user(d, s, l, e) \
+ unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
+
#define unsafe_copy_to_user(d, s, l, e) \
do { \
u8 __user *_dst = (u8 __user *)(d); \
--
2.29.0
^ permalink raw reply related
* [PATCH v2 5/8] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()
From: Christopher M. Riedl @ 2020-11-05 5:16 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
Previously setup_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.
Rewrite setup_sigcontext() to assume that a userspace write access window
is open. Replace all uaccess functions with their 'unsafe' versions
which avoid the repeated uaccess switches.
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 70 ++++++++++++++++++++-------------
1 file changed, 43 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d3e9519b2e62..3f25309826b6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
* Set up the sigcontext for the signal frame.
*/
-static long setup_sigcontext(struct sigcontext __user *sc,
- struct task_struct *tsk, int signr, sigset_t *set,
- unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, \
+ ctx_has_vsx_region, e) \
+ unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set, \
+ handler, ctx_has_vsx_region), e)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+ struct task_struct *tsk, int signr, sigset_t *set,
+ unsigned long handler, int ctx_has_vsx_region)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user *sc,
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
- long err = 0;
/* Force usr to alway see softe as 1 (interrupts enabled) */
unsigned long softe = 0x1;
BUG_ON(tsk != current);
#ifdef CONFIG_ALTIVEC
- err |= __put_user(v_regs, &sc->v_regs);
+ unsafe_put_user(v_regs, &sc->v_regs, efault_out);
/* save altivec registers */
if (tsk->thread.used_vr) {
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
- 33 * sizeof(vector128));
+ unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+ 33 * sizeof(vector128), efault_out);
/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
* contains valid data.
*/
@@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
#else /* CONFIG_ALTIVEC */
- err |= __put_user(0, &sc->v_regs);
+ unsafe_put_user(0, &sc->v_regs, efault_out);
#endif /* CONFIG_ALTIVEC */
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+ unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
*/
if (tsk->thread.used_vsr && ctx_has_vsx_region) {
v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, tsk);
+ unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
*/
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
- err |= __put_user(&sc->gp_regs, &sc->regs);
+ unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
WARN_ON(!FULL_REGS(regs));
- err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
- err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
- err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
- err |= __put_user(signr, &sc->signal);
- err |= __put_user(handler, &sc->handler);
+ unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+ unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+ unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+ unsafe_put_user(signr, &sc->signal, efault_out);
+ unsafe_put_user(handler, &sc->handler, efault_out);
if (set != NULL)
- err |= __put_user(set->sig[0], &sc->oldmask);
+ unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -664,12 +670,15 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (old_ctx != NULL) {
prepare_setup_sigcontext(current, ctx_has_vsx_region);
- if (!access_ok(old_ctx, ctx_size)
- || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
- ctx_has_vsx_region)
- || __copy_to_user(&old_ctx->uc_sigmask,
- ¤t->blocked, sizeof(sigset_t)))
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+
+ unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+ 0, ctx_has_vsx_region, efault_out);
+ unsafe_copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked,
+ sizeof(sigset_t), efault_out);
+
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
@@ -698,6 +707,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
return 0;
+
+efault_out:
+ user_write_access_end();
+ return -EFAULT;
}
@@ -849,9 +862,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
} else {
err |= __put_user(0, &frame->uc.uc_link);
prepare_setup_sigcontext(tsk, 1);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
- NULL, (unsigned long)ksig->ka.sa.sa_handler,
- 1);
+ if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+ return -EFAULT;
+ err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
+ ksig->sig, NULL,
+ (unsigned long)ksig->ka.sa.sa_handler, 1);
+ user_write_access_end();
}
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
--
2.29.0
^ permalink raw reply related
* [PATCH v2 7/8] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches
From: Christopher M. Riedl @ 2020-11-05 5:17 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20201105051701.25053-1-cmr@codefail.de>
From: Daniel Axtens <dja@axtens.net>
Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.
There is no 'unsafe' version of copy_siginfo_to_user, so move it
slightly to allow for a "longer" uaccess block.
Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 54 +++++++++++++++++++++------------
1 file changed, 34 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d72153825719..d17f2d5436d2 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -848,44 +848,51 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long msr __maybe_unused = regs->msr;
frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- if (err)
+ /* This only applies when calling unsafe_setup_sigcontext() and must be
+ * called before opening the uaccess window.
+ */
+ if (!MSR_TM_ACTIVE(msr))
+ prepare_setup_sigcontext(tsk, 1);
+
+ if (!user_write_access_begin(frame, sizeof(*frame)))
goto badframe;
+ unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+ unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+ unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
- err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+ unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+ user_write_access_end();
+
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler,
msr);
+
+ if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+ goto badframe;
+
#endif
} else {
- err |= __put_user(0, &frame->uc.uc_link);
- prepare_setup_sigcontext(tsk, 1);
- if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
- return -EFAULT;
- err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
- ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler, 1);
- user_write_access_end();
+ unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+ unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+ NULL, (unsigned long)ksig->ka.sa.sa_handler,
+ 1, badframe_block);
}
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto badframe;
+
+ unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+ user_write_access_end();
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
@@ -900,6 +907,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
regs->nip = (unsigned long) &frame->tramp[0];
}
+
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -939,6 +951,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
return 0;
+badframe_block:
+ user_write_access_end();
badframe:
signal_fault(current, regs, "handle_rt_signal64", frame);
--
2.29.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox