diff for duplicates of <20170420131154.GL3452@pathway.suse.cz> diff --git a/a/1.txt b/N1/1.txt index 0bc9383..9dcd4e7 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -8,7 +8,7 @@ On Thu 2017-04-20 12:31:12, Sergey Senozhatsky wrote: > > NMI context :-( > > > > [ 1295.168495] <...>-67423 10dNh1 382171111us : do_raw_spin_lock <-_raw_spin_lock -> > [ 1295.168495] <...>-67423 10dNh1 382171111us : sched_stat_runtime: comm=cc1 pid=67423 runtime=96858 [ns] vruntime=11924198270 [ns] +> > [ 1295.168495] <...>-67423 10dNh1 382171111us : sched_stat_runtime: commÌ1 pidg423 runtime–858 [ns] vruntime\x11924198270 [ns] > > [ 1295.168496] <...>-67423 10dNh1 382171111us : lock_acquire: ffffffff81c5c940 read rcu_read_lock > > [ 1295.168497] > > [ 1295.168498] Lost 4890096 message(s)! @@ -80,3 +80,200 @@ I have cooked up a patch based on this. It uses printk_deferred() in NMI when it is safe. Note that console_flush_on_panic() will try to get them on the console when a kdump is not generated. I believe that it will help Steven. + + +From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001 +From: Petr Mladek <pmladek@suse.com> +Date: Thu, 20 Apr 2017 10:52:31 +0200 +Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is + available + +The commit 42a0bb3f71383b457a7d ("printk/nmi: generic solution for safe +printk in NMI") caused that printk stores messages into a temporary +buffer in NMI context. + +The buffer is per-CPU and therefore the size is rather limited. +It works quite well for NMI backtraces. But there are longer logs +that might get printed in NMI context, for example, lockdep +warnings, ftrace_dump_on_oops. + +The temporary buffer is used to avoid deadlocks caused by +logbuf_lock. Also it is needed to avoid races with the other +temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered. +But the main buffer can be used in NMI if the lock is available +and we did not interrupt PRINTK_SAFE_CONTEXT. + +The lock is checked using raw_spin_is_locked(). It might cause +false negatives when the lock is taken on another CPU outside NMI. +For this reason, we do the check in printk_nmi_enter(). It makes +the handling consistent for the entire NMI handler and avoids +reshuffling of the messages. + +The patch also defines special printk context that allows +to use printk_deferred() in NMI. Note that we could not flush +the messages to the consoles because console drivers might use +many other internal locks. + +The newly created vprintk_deferred() disables the preemption +only around the irq work handling. It is needed there to keep +the consistency between the two per-CPU variables. But there +is no reason to disable preemption around vprintk_emit(). + +Finally, the patch patch puts back explicit serialization +of the NMI backtraces from different CPUs. It was removed +by the commit a9edc88093287183ac934b ("x86/nmi: Perform +a safe NMI stack trace on all CPUs"). It was not needed +because the flushing of the temporary per-CPU buffers +was serialized. + +Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> +Signed-off-by: Petr Mladek <pmladek@suse.com> +--- + kernel/printk/internal.h | 6 ++++-- + kernel/printk/printk.c | 19 ++++++++++++++----- + kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++-- + lib/nmi_backtrace.c | 3 +++ + 4 files changed, 44 insertions(+), 9 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 1db044f808b7..2a7d04049af4 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -18,12 +18,14 @@ + + #ifdef CONFIG_PRINTK + +-#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +-#define PRINTK_NMI_CONTEXT_MASK 0x80000000 ++#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff ++#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 ++#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + + extern raw_spinlock_t logbuf_lock; + + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); ++__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + __printf(1, 0) int vprintk_func(const char *fmt, va_list args); + void __printk_safe_enter(void); + void __printk_safe_exit(void); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 2984fb0f0257..16b519927d35 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2691,16 +2691,13 @@ void wake_up_klogd(void) + preempt_enable(); + } + +-int printk_deferred(const char *fmt, ...) ++int vprintk_deferred(const char *fmt, va_list args) + { +- va_list args; + int r; + +- preempt_disable(); +- va_start(args, fmt); + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); +- va_end(args); + ++ preempt_disable(); + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + preempt_enable(); +@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...) + return r; + } + ++int printk_deferred(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_deferred(fmt, args); ++ va_end(args); ++ ++ return r; ++} ++ + /* + * printk rate limiting, lifted from the networking subsystem. + * +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 033e50a7d706..c3d165bcde42 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args) + + void printk_nmi_enter(void) + { +- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ /* ++ * The size of the extra per-CPU buffer is limited. Use it ++ * only when really needed. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK || ++ raw_spin_is_locked(&logbuf_lock)) { ++ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ } else { ++ this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); ++ } + } + + void printk_nmi_exit(void) + { +- this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); ++ this_cpu_and(printk_context, ++ ~(PRINTK_NMI_CONTEXT_MASK || ++ PRINTK_NMI_DEFERRED_CONTEXT_MASK)); + } + + #else +@@ -351,12 +362,22 @@ void __printk_safe_exit(void) + + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) + { ++ /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + ++ /* Use extra buffer to prevent a recursion deadlock in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + ++ /* ++ * Use the main logbuf when logbuf_lock is available in NMI. ++ * But avoid calling console drivers that might have their own locks. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) ++ return vprintk_deferred(fmt, args); ++ ++ /* No obstacles. */ + return vprintk_default(fmt, args); + } + +diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c +index 4e8a30d1c22f..0bc0a3535a8a 100644 +--- a/lib/nmi_backtrace.c ++++ b/lib/nmi_backtrace.c +@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, + + bool nmi_cpu_backtrace(struct pt_regs *regs) + { ++ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + int cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { ++ arch_spin_lock(&lock); + if (regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", + cpu, instruction_pointer(regs)); +@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) + else + dump_stack(); + } ++ arch_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return true; + } +-- +1.8.5.6 diff --git a/a/content_digest b/N1/content_digest index b1863f3..7af0efd 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -4,32 +4,8 @@ "ref\020170420033112.GB542@jagdpanzerIV.localdomain\0" "From\0Petr Mladek <pmladek@suse.com>\0" "Subject\0Re: [PATCH v5 1/4] printk/nmi: generic solution for safe printk in NMI\0" - "Date\0Thu, 20 Apr 2017 15:11:54 +0200\0" - "To\0Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>\0" - "Cc\0Steven Rostedt <rostedt@goodmis.org>" - Andrew Morton <akpm@linux-foundation.org> - Peter Zijlstra <peterz@infradead.org> - Russell King <rmk+kernel@arm.linux.org.uk> - Daniel Thompson <daniel.thompson@linaro.org> - Jiri Kosina <jkosina@suse.com> - Ingo Molnar <mingo@redhat.com> - Thomas Gleixner <tglx@linutronix.de> - Chris Metcalf <cmetcalf@ezchip.com> - linux-kernel@vger.kernel.org - x86@kernel.org - linux-arm-kernel@lists.infradead.org - adi-buildroot-devel@lists.sourceforge.net - linux-cris-kernel@axis.com - linux-mips@linux-mips.org - linuxppc-dev@lists.ozlabs.org - linux-s390@vger.kernel.org - linux-sh@vger.kernel.org - sparclinux@vger.kernel.org - Jan Kara <jack@suse.cz> - Ralf Baechle <ralf@linux-mips.org> - Benjamin Herrenschmidt <benh@kernel.crashing.org> - Martin Schwidefsky <schwidefsky@de.ibm.com> - " David Miller <davem@davemloft.net>\0" + "Date\0Thu, 20 Apr 2017 13:11:54 +0000\0" + "To\0linux-arm-kernel@lists.infradead.org\0" "\00:1\0" "b\0" "On Thu 2017-04-20 12:31:12, Sergey Senozhatsky wrote:\n" @@ -42,7 +18,7 @@ "> > NMI context :-(\n" "> > \n" "> > [ 1295.168495] <...>-67423 10dNh1 382171111us : do_raw_spin_lock <-_raw_spin_lock\n" - "> > [ 1295.168495] <...>-67423 10dNh1 382171111us : sched_stat_runtime: comm=cc1 pid=67423 runtime=96858 [ns] vruntime=11924198270 [ns]\n" + "> > [ 1295.168495] <...>-67423 10dNh1 382171111us : sched_stat_runtime: comm\303\2141 pidg423 runtime\342\200\223858 [ns] vruntime\021924198270 [ns]\n" "> > [ 1295.168496] <...>-67423 10dNh1 382171111us : lock_acquire: ffffffff81c5c940 read rcu_read_lock\n" "> > [ 1295.168497]\n" "> > [ 1295.168498] Lost 4890096 message(s)!\n" @@ -113,6 +89,203 @@ "I have cooked up a patch based on this. It uses printk_deferred()\n" "in NMI when it is safe. Note that console_flush_on_panic() will\n" "try to get them on the console when a kdump is not generated.\n" - I believe that it will help Steven. + "I believe that it will help Steven.\n" + "\n" + "\n" + "From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001\n" + "From: Petr Mladek <pmladek@suse.com>\n" + "Date: Thu, 20 Apr 2017 10:52:31 +0200\n" + "Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is\n" + " available\n" + "\n" + "The commit 42a0bb3f71383b457a7d (\"printk/nmi: generic solution for safe\n" + "printk in NMI\") caused that printk stores messages into a temporary\n" + "buffer in NMI context.\n" + "\n" + "The buffer is per-CPU and therefore the size is rather limited.\n" + "It works quite well for NMI backtraces. But there are longer logs\n" + "that might get printed in NMI context, for example, lockdep\n" + "warnings, ftrace_dump_on_oops.\n" + "\n" + "The temporary buffer is used to avoid deadlocks caused by\n" + "logbuf_lock. Also it is needed to avoid races with the other\n" + "temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered.\n" + "But the main buffer can be used in NMI if the lock is available\n" + "and we did not interrupt PRINTK_SAFE_CONTEXT.\n" + "\n" + "The lock is checked using raw_spin_is_locked(). It might cause\n" + "false negatives when the lock is taken on another CPU outside NMI.\n" + "For this reason, we do the check in printk_nmi_enter(). It makes\n" + "the handling consistent for the entire NMI handler and avoids\n" + "reshuffling of the messages.\n" + "\n" + "The patch also defines special printk context that allows\n" + "to use printk_deferred() in NMI. Note that we could not flush\n" + "the messages to the consoles because console drivers might use\n" + "many other internal locks.\n" + "\n" + "The newly created vprintk_deferred() disables the preemption\n" + "only around the irq work handling. It is needed there to keep\n" + "the consistency between the two per-CPU variables. But there\n" + "is no reason to disable preemption around vprintk_emit().\n" + "\n" + "Finally, the patch patch puts back explicit serialization\n" + "of the NMI backtraces from different CPUs. It was removed\n" + "by the commit a9edc88093287183ac934b (\"x86/nmi: Perform\n" + "a safe NMI stack trace on all CPUs\"). It was not needed\n" + "because the flushing of the temporary per-CPU buffers\n" + "was serialized.\n" + "\n" + "Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>\n" + "Signed-off-by: Petr Mladek <pmladek@suse.com>\n" + "---\n" + " kernel/printk/internal.h | 6 ++++--\n" + " kernel/printk/printk.c | 19 ++++++++++++++-----\n" + " kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++--\n" + " lib/nmi_backtrace.c | 3 +++\n" + " 4 files changed, 44 insertions(+), 9 deletions(-)\n" + "\n" + "diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h\n" + "index 1db044f808b7..2a7d04049af4 100644\n" + "--- a/kernel/printk/internal.h\n" + "+++ b/kernel/printk/internal.h\n" + "@@ -18,12 +18,14 @@\n" + " \n" + " #ifdef CONFIG_PRINTK\n" + " \n" + "-#define PRINTK_SAFE_CONTEXT_MASK\t0x7fffffff\n" + "-#define PRINTK_NMI_CONTEXT_MASK\t0x80000000\n" + "+#define PRINTK_SAFE_CONTEXT_MASK\t 0x3fffffff\n" + "+#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000\n" + "+#define PRINTK_NMI_CONTEXT_MASK\t\t 0x80000000\n" + " \n" + " extern raw_spinlock_t logbuf_lock;\n" + " \n" + " __printf(1, 0) int vprintk_default(const char *fmt, va_list args);\n" + "+__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);\n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args);\n" + " void __printk_safe_enter(void);\n" + " void __printk_safe_exit(void);\n" + "diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c\n" + "index 2984fb0f0257..16b519927d35 100644\n" + "--- a/kernel/printk/printk.c\n" + "+++ b/kernel/printk/printk.c\n" + "@@ -2691,16 +2691,13 @@ void wake_up_klogd(void)\n" + " \tpreempt_enable();\n" + " }\n" + " \n" + "-int printk_deferred(const char *fmt, ...)\n" + "+int vprintk_deferred(const char *fmt, va_list args)\n" + " {\n" + "-\tva_list args;\n" + " \tint r;\n" + " \n" + "-\tpreempt_disable();\n" + "-\tva_start(args, fmt);\n" + " \tr = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);\n" + "-\tva_end(args);\n" + " \n" + "+\tpreempt_disable();\n" + " \t__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);\n" + " \tirq_work_queue(this_cpu_ptr(&wake_up_klogd_work));\n" + " \tpreempt_enable();\n" + "@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...)\n" + " \treturn r;\n" + " }\n" + " \n" + "+int printk_deferred(const char *fmt, ...)\n" + "+{\n" + "+\tva_list args;\n" + "+\tint r;\n" + "+\n" + "+\tva_start(args, fmt);\n" + "+\tr = vprintk_deferred(fmt, args);\n" + "+\tva_end(args);\n" + "+\n" + "+\treturn r;\n" + "+}\n" + "+\n" + " /*\n" + " * printk rate limiting, lifted from the networking subsystem.\n" + " *\n" + "diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c\n" + "index 033e50a7d706..c3d165bcde42 100644\n" + "--- a/kernel/printk/printk_safe.c\n" + "+++ b/kernel/printk/printk_safe.c\n" + "@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args)\n" + " \n" + " void printk_nmi_enter(void)\n" + " {\n" + "-\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t/*\n" + "+\t * The size of the extra per-CPU buffer is limited. Use it\n" + "+\t * only when really needed.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK ||\n" + "+\t raw_spin_is_locked(&logbuf_lock)) {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t} else {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);\n" + "+\t}\n" + " }\n" + " \n" + " void printk_nmi_exit(void)\n" + " {\n" + "-\tthis_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);\n" + "+\tthis_cpu_and(printk_context,\n" + "+\t\t ~(PRINTK_NMI_CONTEXT_MASK ||\n" + "+\t\t PRINTK_NMI_DEFERRED_CONTEXT_MASK));\n" + " }\n" + " \n" + " #else\n" + "@@ -351,12 +362,22 @@ void __printk_safe_exit(void)\n" + " \n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args)\n" + " {\n" + "+\t/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)\n" + " \t\treturn vprintk_nmi(fmt, args);\n" + " \n" + "+\t/* Use extra buffer to prevent a recursion deadlock in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)\n" + " \t\treturn vprintk_safe(fmt, args);\n" + " \n" + "+\t/*\n" + "+\t * Use the main logbuf when logbuf_lock is available in NMI.\n" + "+\t * But avoid calling console drivers that might have their own locks.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)\n" + "+\t\treturn vprintk_deferred(fmt, args);\n" + "+\n" + "+\t/* No obstacles. */\n" + " \treturn vprintk_default(fmt, args);\n" + " }\n" + " \n" + "diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c\n" + "index 4e8a30d1c22f..0bc0a3535a8a 100644\n" + "--- a/lib/nmi_backtrace.c\n" + "+++ b/lib/nmi_backtrace.c\n" + "@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,\n" + " \n" + " bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " {\n" + "+\tstatic arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;\n" + " \tint cpu = smp_processor_id();\n" + " \n" + " \tif (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {\n" + "+\t\tarch_spin_lock(&lock);\n" + " \t\tif (regs && cpu_in_idle(instruction_pointer(regs))) {\n" + " \t\t\tpr_warn(\"NMI backtrace for cpu %d skipped: idling at pc %#lx\\n\",\n" + " \t\t\t\tcpu, instruction_pointer(regs));\n" + "@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " \t\t\telse\n" + " \t\t\t\tdump_stack();\n" + " \t\t}\n" + "+\t\tarch_spin_unlock(&lock);\n" + " \t\tcpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));\n" + " \t\treturn true;\n" + " \t}\n" + "-- \n" + 1.8.5.6 -22062e0cf824608ca96c0b3e1448fae72938b7dfc4191e4eb21bfd86e06df865 +05bb401f72e2cb7dce64f3bf7ed5c97d74770822e6348f7aafe6ca6923072fbb
diff --git a/a/1.txt b/N2/1.txt index 0bc9383..a28a777 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -80,3 +80,200 @@ I have cooked up a patch based on this. It uses printk_deferred() in NMI when it is safe. Note that console_flush_on_panic() will try to get them on the console when a kdump is not generated. I believe that it will help Steven. + + +>From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001 +From: Petr Mladek <pmladek@suse.com> +Date: Thu, 20 Apr 2017 10:52:31 +0200 +Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is + available + +The commit 42a0bb3f71383b457a7d ("printk/nmi: generic solution for safe +printk in NMI") caused that printk stores messages into a temporary +buffer in NMI context. + +The buffer is per-CPU and therefore the size is rather limited. +It works quite well for NMI backtraces. But there are longer logs +that might get printed in NMI context, for example, lockdep +warnings, ftrace_dump_on_oops. + +The temporary buffer is used to avoid deadlocks caused by +logbuf_lock. Also it is needed to avoid races with the other +temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered. +But the main buffer can be used in NMI if the lock is available +and we did not interrupt PRINTK_SAFE_CONTEXT. + +The lock is checked using raw_spin_is_locked(). It might cause +false negatives when the lock is taken on another CPU outside NMI. +For this reason, we do the check in printk_nmi_enter(). It makes +the handling consistent for the entire NMI handler and avoids +reshuffling of the messages. + +The patch also defines special printk context that allows +to use printk_deferred() in NMI. Note that we could not flush +the messages to the consoles because console drivers might use +many other internal locks. + +The newly created vprintk_deferred() disables the preemption +only around the irq work handling. It is needed there to keep +the consistency between the two per-CPU variables. But there +is no reason to disable preemption around vprintk_emit(). + +Finally, the patch patch puts back explicit serialization +of the NMI backtraces from different CPUs. It was removed +by the commit a9edc88093287183ac934b ("x86/nmi: Perform +a safe NMI stack trace on all CPUs"). It was not needed +because the flushing of the temporary per-CPU buffers +was serialized. + +Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> +Signed-off-by: Petr Mladek <pmladek@suse.com> +--- + kernel/printk/internal.h | 6 ++++-- + kernel/printk/printk.c | 19 ++++++++++++++----- + kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++-- + lib/nmi_backtrace.c | 3 +++ + 4 files changed, 44 insertions(+), 9 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 1db044f808b7..2a7d04049af4 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -18,12 +18,14 @@ + + #ifdef CONFIG_PRINTK + +-#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +-#define PRINTK_NMI_CONTEXT_MASK 0x80000000 ++#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff ++#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 ++#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + + extern raw_spinlock_t logbuf_lock; + + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); ++__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + __printf(1, 0) int vprintk_func(const char *fmt, va_list args); + void __printk_safe_enter(void); + void __printk_safe_exit(void); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 2984fb0f0257..16b519927d35 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2691,16 +2691,13 @@ void wake_up_klogd(void) + preempt_enable(); + } + +-int printk_deferred(const char *fmt, ...) ++int vprintk_deferred(const char *fmt, va_list args) + { +- va_list args; + int r; + +- preempt_disable(); +- va_start(args, fmt); + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); +- va_end(args); + ++ preempt_disable(); + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + preempt_enable(); +@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...) + return r; + } + ++int printk_deferred(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_deferred(fmt, args); ++ va_end(args); ++ ++ return r; ++} ++ + /* + * printk rate limiting, lifted from the networking subsystem. + * +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 033e50a7d706..c3d165bcde42 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args) + + void printk_nmi_enter(void) + { +- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ /* ++ * The size of the extra per-CPU buffer is limited. Use it ++ * only when really needed. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK || ++ raw_spin_is_locked(&logbuf_lock)) { ++ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ } else { ++ this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); ++ } + } + + void printk_nmi_exit(void) + { +- this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); ++ this_cpu_and(printk_context, ++ ~(PRINTK_NMI_CONTEXT_MASK || ++ PRINTK_NMI_DEFERRED_CONTEXT_MASK)); + } + + #else +@@ -351,12 +362,22 @@ void __printk_safe_exit(void) + + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) + { ++ /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + ++ /* Use extra buffer to prevent a recursion deadlock in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + ++ /* ++ * Use the main logbuf when logbuf_lock is available in NMI. ++ * But avoid calling console drivers that might have their own locks. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) ++ return vprintk_deferred(fmt, args); ++ ++ /* No obstacles. */ + return vprintk_default(fmt, args); + } + +diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c +index 4e8a30d1c22f..0bc0a3535a8a 100644 +--- a/lib/nmi_backtrace.c ++++ b/lib/nmi_backtrace.c +@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, + + bool nmi_cpu_backtrace(struct pt_regs *regs) + { ++ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + int cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { ++ arch_spin_lock(&lock); + if (regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", + cpu, instruction_pointer(regs)); +@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) + else + dump_stack(); + } ++ arch_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return true; + } +-- +1.8.5.6 diff --git a/a/content_digest b/N2/content_digest index b1863f3..c3b2ecb 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -113,6 +113,203 @@ "I have cooked up a patch based on this. It uses printk_deferred()\n" "in NMI when it is safe. Note that console_flush_on_panic() will\n" "try to get them on the console when a kdump is not generated.\n" - I believe that it will help Steven. + "I believe that it will help Steven.\n" + "\n" + "\n" + ">From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001\n" + "From: Petr Mladek <pmladek@suse.com>\n" + "Date: Thu, 20 Apr 2017 10:52:31 +0200\n" + "Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is\n" + " available\n" + "\n" + "The commit 42a0bb3f71383b457a7d (\"printk/nmi: generic solution for safe\n" + "printk in NMI\") caused that printk stores messages into a temporary\n" + "buffer in NMI context.\n" + "\n" + "The buffer is per-CPU and therefore the size is rather limited.\n" + "It works quite well for NMI backtraces. But there are longer logs\n" + "that might get printed in NMI context, for example, lockdep\n" + "warnings, ftrace_dump_on_oops.\n" + "\n" + "The temporary buffer is used to avoid deadlocks caused by\n" + "logbuf_lock. Also it is needed to avoid races with the other\n" + "temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered.\n" + "But the main buffer can be used in NMI if the lock is available\n" + "and we did not interrupt PRINTK_SAFE_CONTEXT.\n" + "\n" + "The lock is checked using raw_spin_is_locked(). It might cause\n" + "false negatives when the lock is taken on another CPU outside NMI.\n" + "For this reason, we do the check in printk_nmi_enter(). It makes\n" + "the handling consistent for the entire NMI handler and avoids\n" + "reshuffling of the messages.\n" + "\n" + "The patch also defines special printk context that allows\n" + "to use printk_deferred() in NMI. Note that we could not flush\n" + "the messages to the consoles because console drivers might use\n" + "many other internal locks.\n" + "\n" + "The newly created vprintk_deferred() disables the preemption\n" + "only around the irq work handling. It is needed there to keep\n" + "the consistency between the two per-CPU variables. But there\n" + "is no reason to disable preemption around vprintk_emit().\n" + "\n" + "Finally, the patch patch puts back explicit serialization\n" + "of the NMI backtraces from different CPUs. It was removed\n" + "by the commit a9edc88093287183ac934b (\"x86/nmi: Perform\n" + "a safe NMI stack trace on all CPUs\"). It was not needed\n" + "because the flushing of the temporary per-CPU buffers\n" + "was serialized.\n" + "\n" + "Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>\n" + "Signed-off-by: Petr Mladek <pmladek@suse.com>\n" + "---\n" + " kernel/printk/internal.h | 6 ++++--\n" + " kernel/printk/printk.c | 19 ++++++++++++++-----\n" + " kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++--\n" + " lib/nmi_backtrace.c | 3 +++\n" + " 4 files changed, 44 insertions(+), 9 deletions(-)\n" + "\n" + "diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h\n" + "index 1db044f808b7..2a7d04049af4 100644\n" + "--- a/kernel/printk/internal.h\n" + "+++ b/kernel/printk/internal.h\n" + "@@ -18,12 +18,14 @@\n" + " \n" + " #ifdef CONFIG_PRINTK\n" + " \n" + "-#define PRINTK_SAFE_CONTEXT_MASK\t0x7fffffff\n" + "-#define PRINTK_NMI_CONTEXT_MASK\t0x80000000\n" + "+#define PRINTK_SAFE_CONTEXT_MASK\t 0x3fffffff\n" + "+#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000\n" + "+#define PRINTK_NMI_CONTEXT_MASK\t\t 0x80000000\n" + " \n" + " extern raw_spinlock_t logbuf_lock;\n" + " \n" + " __printf(1, 0) int vprintk_default(const char *fmt, va_list args);\n" + "+__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);\n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args);\n" + " void __printk_safe_enter(void);\n" + " void __printk_safe_exit(void);\n" + "diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c\n" + "index 2984fb0f0257..16b519927d35 100644\n" + "--- a/kernel/printk/printk.c\n" + "+++ b/kernel/printk/printk.c\n" + "@@ -2691,16 +2691,13 @@ void wake_up_klogd(void)\n" + " \tpreempt_enable();\n" + " }\n" + " \n" + "-int printk_deferred(const char *fmt, ...)\n" + "+int vprintk_deferred(const char *fmt, va_list args)\n" + " {\n" + "-\tva_list args;\n" + " \tint r;\n" + " \n" + "-\tpreempt_disable();\n" + "-\tva_start(args, fmt);\n" + " \tr = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);\n" + "-\tva_end(args);\n" + " \n" + "+\tpreempt_disable();\n" + " \t__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);\n" + " \tirq_work_queue(this_cpu_ptr(&wake_up_klogd_work));\n" + " \tpreempt_enable();\n" + "@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...)\n" + " \treturn r;\n" + " }\n" + " \n" + "+int printk_deferred(const char *fmt, ...)\n" + "+{\n" + "+\tva_list args;\n" + "+\tint r;\n" + "+\n" + "+\tva_start(args, fmt);\n" + "+\tr = vprintk_deferred(fmt, args);\n" + "+\tva_end(args);\n" + "+\n" + "+\treturn r;\n" + "+}\n" + "+\n" + " /*\n" + " * printk rate limiting, lifted from the networking subsystem.\n" + " *\n" + "diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c\n" + "index 033e50a7d706..c3d165bcde42 100644\n" + "--- a/kernel/printk/printk_safe.c\n" + "+++ b/kernel/printk/printk_safe.c\n" + "@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args)\n" + " \n" + " void printk_nmi_enter(void)\n" + " {\n" + "-\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t/*\n" + "+\t * The size of the extra per-CPU buffer is limited. Use it\n" + "+\t * only when really needed.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK ||\n" + "+\t raw_spin_is_locked(&logbuf_lock)) {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t} else {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);\n" + "+\t}\n" + " }\n" + " \n" + " void printk_nmi_exit(void)\n" + " {\n" + "-\tthis_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);\n" + "+\tthis_cpu_and(printk_context,\n" + "+\t\t ~(PRINTK_NMI_CONTEXT_MASK ||\n" + "+\t\t PRINTK_NMI_DEFERRED_CONTEXT_MASK));\n" + " }\n" + " \n" + " #else\n" + "@@ -351,12 +362,22 @@ void __printk_safe_exit(void)\n" + " \n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args)\n" + " {\n" + "+\t/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)\n" + " \t\treturn vprintk_nmi(fmt, args);\n" + " \n" + "+\t/* Use extra buffer to prevent a recursion deadlock in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)\n" + " \t\treturn vprintk_safe(fmt, args);\n" + " \n" + "+\t/*\n" + "+\t * Use the main logbuf when logbuf_lock is available in NMI.\n" + "+\t * But avoid calling console drivers that might have their own locks.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)\n" + "+\t\treturn vprintk_deferred(fmt, args);\n" + "+\n" + "+\t/* No obstacles. */\n" + " \treturn vprintk_default(fmt, args);\n" + " }\n" + " \n" + "diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c\n" + "index 4e8a30d1c22f..0bc0a3535a8a 100644\n" + "--- a/lib/nmi_backtrace.c\n" + "+++ b/lib/nmi_backtrace.c\n" + "@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,\n" + " \n" + " bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " {\n" + "+\tstatic arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;\n" + " \tint cpu = smp_processor_id();\n" + " \n" + " \tif (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {\n" + "+\t\tarch_spin_lock(&lock);\n" + " \t\tif (regs && cpu_in_idle(instruction_pointer(regs))) {\n" + " \t\t\tpr_warn(\"NMI backtrace for cpu %d skipped: idling at pc %#lx\\n\",\n" + " \t\t\t\tcpu, instruction_pointer(regs));\n" + "@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " \t\t\telse\n" + " \t\t\t\tdump_stack();\n" + " \t\t}\n" + "+\t\tarch_spin_unlock(&lock);\n" + " \t\tcpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));\n" + " \t\treturn true;\n" + " \t}\n" + "-- \n" + 1.8.5.6 -22062e0cf824608ca96c0b3e1448fae72938b7dfc4191e4eb21bfd86e06df865 +205606b0317d46c040ef6c1795996b7cc813498876b7cad8862c849a9fefc3ab
diff --git a/a/1.txt b/N3/1.txt index 0bc9383..a28a777 100644 --- a/a/1.txt +++ b/N3/1.txt @@ -80,3 +80,200 @@ I have cooked up a patch based on this. It uses printk_deferred() in NMI when it is safe. Note that console_flush_on_panic() will try to get them on the console when a kdump is not generated. I believe that it will help Steven. + + +>From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001 +From: Petr Mladek <pmladek@suse.com> +Date: Thu, 20 Apr 2017 10:52:31 +0200 +Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is + available + +The commit 42a0bb3f71383b457a7d ("printk/nmi: generic solution for safe +printk in NMI") caused that printk stores messages into a temporary +buffer in NMI context. + +The buffer is per-CPU and therefore the size is rather limited. +It works quite well for NMI backtraces. But there are longer logs +that might get printed in NMI context, for example, lockdep +warnings, ftrace_dump_on_oops. + +The temporary buffer is used to avoid deadlocks caused by +logbuf_lock. Also it is needed to avoid races with the other +temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered. +But the main buffer can be used in NMI if the lock is available +and we did not interrupt PRINTK_SAFE_CONTEXT. + +The lock is checked using raw_spin_is_locked(). It might cause +false negatives when the lock is taken on another CPU outside NMI. +For this reason, we do the check in printk_nmi_enter(). It makes +the handling consistent for the entire NMI handler and avoids +reshuffling of the messages. + +The patch also defines special printk context that allows +to use printk_deferred() in NMI. Note that we could not flush +the messages to the consoles because console drivers might use +many other internal locks. + +The newly created vprintk_deferred() disables the preemption +only around the irq work handling. It is needed there to keep +the consistency between the two per-CPU variables. But there +is no reason to disable preemption around vprintk_emit(). + +Finally, the patch patch puts back explicit serialization +of the NMI backtraces from different CPUs. It was removed +by the commit a9edc88093287183ac934b ("x86/nmi: Perform +a safe NMI stack trace on all CPUs"). It was not needed +because the flushing of the temporary per-CPU buffers +was serialized. + +Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> +Signed-off-by: Petr Mladek <pmladek@suse.com> +--- + kernel/printk/internal.h | 6 ++++-- + kernel/printk/printk.c | 19 ++++++++++++++----- + kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++-- + lib/nmi_backtrace.c | 3 +++ + 4 files changed, 44 insertions(+), 9 deletions(-) + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 1db044f808b7..2a7d04049af4 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -18,12 +18,14 @@ + + #ifdef CONFIG_PRINTK + +-#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +-#define PRINTK_NMI_CONTEXT_MASK 0x80000000 ++#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff ++#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 ++#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + + extern raw_spinlock_t logbuf_lock; + + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); ++__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + __printf(1, 0) int vprintk_func(const char *fmt, va_list args); + void __printk_safe_enter(void); + void __printk_safe_exit(void); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 2984fb0f0257..16b519927d35 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2691,16 +2691,13 @@ void wake_up_klogd(void) + preempt_enable(); + } + +-int printk_deferred(const char *fmt, ...) ++int vprintk_deferred(const char *fmt, va_list args) + { +- va_list args; + int r; + +- preempt_disable(); +- va_start(args, fmt); + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); +- va_end(args); + ++ preempt_disable(); + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + preempt_enable(); +@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...) + return r; + } + ++int printk_deferred(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_deferred(fmt, args); ++ va_end(args); ++ ++ return r; ++} ++ + /* + * printk rate limiting, lifted from the networking subsystem. + * +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 033e50a7d706..c3d165bcde42 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args) + + void printk_nmi_enter(void) + { +- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ /* ++ * The size of the extra per-CPU buffer is limited. Use it ++ * only when really needed. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK || ++ raw_spin_is_locked(&logbuf_lock)) { ++ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); ++ } else { ++ this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); ++ } + } + + void printk_nmi_exit(void) + { +- this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); ++ this_cpu_and(printk_context, ++ ~(PRINTK_NMI_CONTEXT_MASK || ++ PRINTK_NMI_DEFERRED_CONTEXT_MASK)); + } + + #else +@@ -351,12 +362,22 @@ void __printk_safe_exit(void) + + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) + { ++ /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + ++ /* Use extra buffer to prevent a recursion deadlock in safe mode. */ + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + ++ /* ++ * Use the main logbuf when logbuf_lock is available in NMI. ++ * But avoid calling console drivers that might have their own locks. ++ */ ++ if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) ++ return vprintk_deferred(fmt, args); ++ ++ /* No obstacles. */ + return vprintk_default(fmt, args); + } + +diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c +index 4e8a30d1c22f..0bc0a3535a8a 100644 +--- a/lib/nmi_backtrace.c ++++ b/lib/nmi_backtrace.c +@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, + + bool nmi_cpu_backtrace(struct pt_regs *regs) + { ++ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + int cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { ++ arch_spin_lock(&lock); + if (regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", + cpu, instruction_pointer(regs)); +@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) + else + dump_stack(); + } ++ arch_spin_unlock(&lock); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return true; + } +-- +1.8.5.6 diff --git a/a/content_digest b/N3/content_digest index b1863f3..f6f792a 100644 --- a/a/content_digest +++ b/N3/content_digest @@ -2,34 +2,10 @@ "ref\01461239325-22779-2-git-send-email-pmladek@suse.com\0" "ref\020170419131341.76bc7634@gandalf.local.home\0" "ref\020170420033112.GB542@jagdpanzerIV.localdomain\0" - "From\0Petr Mladek <pmladek@suse.com>\0" - "Subject\0Re: [PATCH v5 1/4] printk/nmi: generic solution for safe printk in NMI\0" + "From\0pmladek@suse.com (Petr Mladek)\0" + "Subject\0[PATCH v5 1/4] printk/nmi: generic solution for safe printk in NMI\0" "Date\0Thu, 20 Apr 2017 15:11:54 +0200\0" - "To\0Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>\0" - "Cc\0Steven Rostedt <rostedt@goodmis.org>" - Andrew Morton <akpm@linux-foundation.org> - Peter Zijlstra <peterz@infradead.org> - Russell King <rmk+kernel@arm.linux.org.uk> - Daniel Thompson <daniel.thompson@linaro.org> - Jiri Kosina <jkosina@suse.com> - Ingo Molnar <mingo@redhat.com> - Thomas Gleixner <tglx@linutronix.de> - Chris Metcalf <cmetcalf@ezchip.com> - linux-kernel@vger.kernel.org - x86@kernel.org - linux-arm-kernel@lists.infradead.org - adi-buildroot-devel@lists.sourceforge.net - linux-cris-kernel@axis.com - linux-mips@linux-mips.org - linuxppc-dev@lists.ozlabs.org - linux-s390@vger.kernel.org - linux-sh@vger.kernel.org - sparclinux@vger.kernel.org - Jan Kara <jack@suse.cz> - Ralf Baechle <ralf@linux-mips.org> - Benjamin Herrenschmidt <benh@kernel.crashing.org> - Martin Schwidefsky <schwidefsky@de.ibm.com> - " David Miller <davem@davemloft.net>\0" + "To\0linux-arm-kernel@lists.infradead.org\0" "\00:1\0" "b\0" "On Thu 2017-04-20 12:31:12, Sergey Senozhatsky wrote:\n" @@ -113,6 +89,203 @@ "I have cooked up a patch based on this. It uses printk_deferred()\n" "in NMI when it is safe. Note that console_flush_on_panic() will\n" "try to get them on the console when a kdump is not generated.\n" - I believe that it will help Steven. + "I believe that it will help Steven.\n" + "\n" + "\n" + ">From c530d9dee91c74db5e6a198479e2e63b24cb84a2 Mon Sep 17 00:00:00 2001\n" + "From: Petr Mladek <pmladek@suse.com>\n" + "Date: Thu, 20 Apr 2017 10:52:31 +0200\n" + "Subject: [PATCH] printk: Use the main logbuf in NMI when logbuf_lock is\n" + " available\n" + "\n" + "The commit 42a0bb3f71383b457a7d (\"printk/nmi: generic solution for safe\n" + "printk in NMI\") caused that printk stores messages into a temporary\n" + "buffer in NMI context.\n" + "\n" + "The buffer is per-CPU and therefore the size is rather limited.\n" + "It works quite well for NMI backtraces. But there are longer logs\n" + "that might get printed in NMI context, for example, lockdep\n" + "warnings, ftrace_dump_on_oops.\n" + "\n" + "The temporary buffer is used to avoid deadlocks caused by\n" + "logbuf_lock. Also it is needed to avoid races with the other\n" + "temporary buffer that is used when PRINTK_SAFE_CONTEXT is entered.\n" + "But the main buffer can be used in NMI if the lock is available\n" + "and we did not interrupt PRINTK_SAFE_CONTEXT.\n" + "\n" + "The lock is checked using raw_spin_is_locked(). It might cause\n" + "false negatives when the lock is taken on another CPU outside NMI.\n" + "For this reason, we do the check in printk_nmi_enter(). It makes\n" + "the handling consistent for the entire NMI handler and avoids\n" + "reshuffling of the messages.\n" + "\n" + "The patch also defines special printk context that allows\n" + "to use printk_deferred() in NMI. Note that we could not flush\n" + "the messages to the consoles because console drivers might use\n" + "many other internal locks.\n" + "\n" + "The newly created vprintk_deferred() disables the preemption\n" + "only around the irq work handling. It is needed there to keep\n" + "the consistency between the two per-CPU variables. But there\n" + "is no reason to disable preemption around vprintk_emit().\n" + "\n" + "Finally, the patch patch puts back explicit serialization\n" + "of the NMI backtraces from different CPUs. It was removed\n" + "by the commit a9edc88093287183ac934b (\"x86/nmi: Perform\n" + "a safe NMI stack trace on all CPUs\"). It was not needed\n" + "because the flushing of the temporary per-CPU buffers\n" + "was serialized.\n" + "\n" + "Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>\n" + "Signed-off-by: Petr Mladek <pmladek@suse.com>\n" + "---\n" + " kernel/printk/internal.h | 6 ++++--\n" + " kernel/printk/printk.c | 19 ++++++++++++++-----\n" + " kernel/printk/printk_safe.c | 25 +++++++++++++++++++++++--\n" + " lib/nmi_backtrace.c | 3 +++\n" + " 4 files changed, 44 insertions(+), 9 deletions(-)\n" + "\n" + "diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h\n" + "index 1db044f808b7..2a7d04049af4 100644\n" + "--- a/kernel/printk/internal.h\n" + "+++ b/kernel/printk/internal.h\n" + "@@ -18,12 +18,14 @@\n" + " \n" + " #ifdef CONFIG_PRINTK\n" + " \n" + "-#define PRINTK_SAFE_CONTEXT_MASK\t0x7fffffff\n" + "-#define PRINTK_NMI_CONTEXT_MASK\t0x80000000\n" + "+#define PRINTK_SAFE_CONTEXT_MASK\t 0x3fffffff\n" + "+#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000\n" + "+#define PRINTK_NMI_CONTEXT_MASK\t\t 0x80000000\n" + " \n" + " extern raw_spinlock_t logbuf_lock;\n" + " \n" + " __printf(1, 0) int vprintk_default(const char *fmt, va_list args);\n" + "+__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);\n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args);\n" + " void __printk_safe_enter(void);\n" + " void __printk_safe_exit(void);\n" + "diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c\n" + "index 2984fb0f0257..16b519927d35 100644\n" + "--- a/kernel/printk/printk.c\n" + "+++ b/kernel/printk/printk.c\n" + "@@ -2691,16 +2691,13 @@ void wake_up_klogd(void)\n" + " \tpreempt_enable();\n" + " }\n" + " \n" + "-int printk_deferred(const char *fmt, ...)\n" + "+int vprintk_deferred(const char *fmt, va_list args)\n" + " {\n" + "-\tva_list args;\n" + " \tint r;\n" + " \n" + "-\tpreempt_disable();\n" + "-\tva_start(args, fmt);\n" + " \tr = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);\n" + "-\tva_end(args);\n" + " \n" + "+\tpreempt_disable();\n" + " \t__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);\n" + " \tirq_work_queue(this_cpu_ptr(&wake_up_klogd_work));\n" + " \tpreempt_enable();\n" + "@@ -2708,6 +2705,18 @@ int printk_deferred(const char *fmt, ...)\n" + " \treturn r;\n" + " }\n" + " \n" + "+int printk_deferred(const char *fmt, ...)\n" + "+{\n" + "+\tva_list args;\n" + "+\tint r;\n" + "+\n" + "+\tva_start(args, fmt);\n" + "+\tr = vprintk_deferred(fmt, args);\n" + "+\tva_end(args);\n" + "+\n" + "+\treturn r;\n" + "+}\n" + "+\n" + " /*\n" + " * printk rate limiting, lifted from the networking subsystem.\n" + " *\n" + "diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c\n" + "index 033e50a7d706..c3d165bcde42 100644\n" + "--- a/kernel/printk/printk_safe.c\n" + "+++ b/kernel/printk/printk_safe.c\n" + "@@ -308,12 +308,23 @@ static int vprintk_nmi(const char *fmt, va_list args)\n" + " \n" + " void printk_nmi_enter(void)\n" + " {\n" + "-\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t/*\n" + "+\t * The size of the extra per-CPU buffer is limited. Use it\n" + "+\t * only when really needed.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK ||\n" + "+\t raw_spin_is_locked(&logbuf_lock)) {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);\n" + "+\t} else {\n" + "+\t\tthis_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);\n" + "+\t}\n" + " }\n" + " \n" + " void printk_nmi_exit(void)\n" + " {\n" + "-\tthis_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);\n" + "+\tthis_cpu_and(printk_context,\n" + "+\t\t ~(PRINTK_NMI_CONTEXT_MASK ||\n" + "+\t\t PRINTK_NMI_DEFERRED_CONTEXT_MASK));\n" + " }\n" + " \n" + " #else\n" + "@@ -351,12 +362,22 @@ void __printk_safe_exit(void)\n" + " \n" + " __printf(1, 0) int vprintk_func(const char *fmt, va_list args)\n" + " {\n" + "+\t/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)\n" + " \t\treturn vprintk_nmi(fmt, args);\n" + " \n" + "+\t/* Use extra buffer to prevent a recursion deadlock in safe mode. */\n" + " \tif (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)\n" + " \t\treturn vprintk_safe(fmt, args);\n" + " \n" + "+\t/*\n" + "+\t * Use the main logbuf when logbuf_lock is available in NMI.\n" + "+\t * But avoid calling console drivers that might have their own locks.\n" + "+\t */\n" + "+\tif (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)\n" + "+\t\treturn vprintk_deferred(fmt, args);\n" + "+\n" + "+\t/* No obstacles. */\n" + " \treturn vprintk_default(fmt, args);\n" + " }\n" + " \n" + "diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c\n" + "index 4e8a30d1c22f..0bc0a3535a8a 100644\n" + "--- a/lib/nmi_backtrace.c\n" + "+++ b/lib/nmi_backtrace.c\n" + "@@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,\n" + " \n" + " bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " {\n" + "+\tstatic arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;\n" + " \tint cpu = smp_processor_id();\n" + " \n" + " \tif (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {\n" + "+\t\tarch_spin_lock(&lock);\n" + " \t\tif (regs && cpu_in_idle(instruction_pointer(regs))) {\n" + " \t\t\tpr_warn(\"NMI backtrace for cpu %d skipped: idling at pc %#lx\\n\",\n" + " \t\t\t\tcpu, instruction_pointer(regs));\n" + "@@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)\n" + " \t\t\telse\n" + " \t\t\t\tdump_stack();\n" + " \t\t}\n" + "+\t\tarch_spin_unlock(&lock);\n" + " \t\tcpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));\n" + " \t\treturn true;\n" + " \t}\n" + "-- \n" + 1.8.5.6 -22062e0cf824608ca96c0b3e1448fae72938b7dfc4191e4eb21bfd86e06df865 +e4ad4a90d7d8c1067ae1e98cc96fe783c40725617b1fd5c81a6c52f6f9a0d119
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.