From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Paul McKenney <paulmck@kernel.org>,
Josh Poimboeuf <jpoimboe@redhat.com>,
"Joel Fernandes (Google)" <joel@joelfernandes.org>,
"Steven Rostedt (VMware)" <rostedt@goodmis.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Frederic Weisbecker <frederic@kernel.org>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Brian Gerst <brgerst@gmail.com>, Juergen Gross <jgross@suse.com>,
Alexandre Chartre <alexandre.chartre@oracle.com>,
Peter Zijlstra <peterz@infradead.org>,
Tom Lendacky <thomas.lendacky@amd.com>,
Paolo Bonzini <pbonzini@redhat.com>,
kvm@vger.kernel.org
Subject: [RESEND][patch V3 07/23] lockdep: Prepare for noinstr sections
Date: Fri, 20 Mar 2020 19:00:03 +0100 [thread overview]
Message-ID: <20200320180033.092520097@linutronix.de> (raw)
In-Reply-To: 20200320175956.033706968@linutronix.de
From: Peter Zijlstra <peterz@infradead.org>
Lockdep is invoked after RCU stopped watching or before it restarted
watching from the low level entry/exit code.
lockdep_hardirqs_on() is part of the irq-state tracking; it is the
callback that indicates we're about to enable IRQs. But because of
this, lockdep has co-opted this callback to do lock state updates. All
(still) held locks will get marked with ENABLED_HARDIRQ, which then
also looks for cycles connecting to USED_IN_HARDIRQ for IRQ recursion
deadlocks.
This results in quite a lot of lockdep code getting ran, but worse, it
will want to do stack-traces for the lock state changes. Stack traces
require RCU.
Because code that requires RCU must not run after we've shut down RCU,
and shutting down RCU itself requires locks in some circumstances,
split this into two parts:
- lockdep_hardirqs_on_prepare() -- updates the held lock state
- lockdep_hardirqs_on() -- does the irq state tracking
This allows running the lock state changes and stack-traces with RCU
enaabled, while doing the IRQ state change later. Of course, this
opens a window where the lock stack can change. Therefore
lockdep_hardirqs_on_prepare() will snapshot the chain_key and
lockdep_hardirqs_on() will validate it still matches. This ensures
that, even when interleaved code uses locks, the actual lock state
didn't change between these two calls.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/irqflags.h | 2 +
include/linux/sched.h | 1
kernel/locking/lockdep.c | 66 +++++++++++++++++++++++++++++-----------
kernel/trace/trace_preemptirq.c | 2 +
lib/debug_locks.c | 2 -
5 files changed, 55 insertions(+), 18 deletions(-)
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,11 +19,13 @@
#ifdef CONFIG_PROVE_LOCKING
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
+ extern void lockdep_hardirqs_on_prepare(unsigned long ip);
extern void lockdep_hardirqs_on(unsigned long ip);
extern void lockdep_hardirqs_off(unsigned long ip);
#else
static inline void trace_softirqs_on(unsigned long ip) { }
static inline void trace_softirqs_off(unsigned long ip) { }
+ static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
static inline void lockdep_hardirqs_on(unsigned long ip) { }
static inline void lockdep_hardirqs_off(unsigned long ip) { }
#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -976,6 +976,7 @@ struct task_struct {
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
+ u64 hardirq_chain_key;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3370,9 +3370,6 @@ static void __trace_hardirqs_on_caller(u
{
struct task_struct *curr = current;
- /* we'll do an OFF -> ON transition: */
- curr->hardirqs_enabled = 1;
-
/*
* We are going to turn hardirqs on, so set the
* usage bit for all held locks:
@@ -3384,16 +3381,13 @@ static void __trace_hardirqs_on_caller(u
* bit for all held locks. (disabled hardirqs prevented
* this bit from being set before)
*/
- if (curr->softirqs_enabled)
+ if (curr->softirqs_enabled) {
if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ))
return;
-
- curr->hardirq_enable_ip = ip;
- curr->hardirq_enable_event = ++curr->irq_events;
- debug_atomic_inc(hardirqs_on_events);
+ }
}
-void lockdep_hardirqs_on(unsigned long ip)
+void lockdep_hardirqs_on_prepare(unsigned long ip)
{
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -3429,20 +3423,59 @@ void lockdep_hardirqs_on(unsigned long i
if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
return;
+ current->hardirq_chain_key = current->curr_chain_key;
+
current->lockdep_recursion = 1;
__trace_hardirqs_on_caller(ip);
current->lockdep_recursion = 0;
}
-NOKPROBE_SYMBOL(lockdep_hardirqs_on);
+void noinstr lockdep_hardirqs_on(unsigned long ip)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(!debug_locks || curr->lockdep_recursion))
+ return;
+
+ if (curr->hardirqs_enabled) {
+ /*
+ * Neither irq nor preemption are disabled here
+ * so this is racy by nature but losing one hit
+ * in a stat is not a big deal.
+ */
+ __debug_atomic_inc(redundant_hardirqs_on);
+ return;
+ }
+
+ /*
+ * We're enabling irqs and according to our state above irqs weren't
+ * already enabled, yet we find the hardware thinks they are in fact
+ * enabled.. someone messed up their IRQ state tracing.
+ */
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return;
+
+ /*
+ * Ensure the lock stack remained unchanged between
+ * lockdep_hardirqs_on_prepare() and lockdep_hardirqs_on().
+ */
+ DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
+ current->curr_chain_key);
+
+ /* we'll do an OFF -> ON transition: */
+ curr->hardirqs_enabled = 1;
+ curr->hardirq_enable_ip = ip;
+ curr->hardirq_enable_event = ++curr->irq_events;
+ debug_atomic_inc(hardirqs_on_events);
+}
/*
* Hardirqs were disabled:
*/
-void lockdep_hardirqs_off(unsigned long ip)
+void noinstr lockdep_hardirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!debug_locks || curr->lockdep_recursion))
return;
/*
@@ -3463,7 +3496,6 @@ void lockdep_hardirqs_off(unsigned long
} else
debug_atomic_inc(redundant_hardirqs_off);
}
-NOKPROBE_SYMBOL(lockdep_hardirqs_off);
/*
* Softirqs will be enabled:
@@ -4007,8 +4039,8 @@ static void print_unlock_imbalance_bug(s
dump_stack();
}
-static int match_held_lock(const struct held_lock *hlock,
- const struct lockdep_map *lock)
+static noinstr int match_held_lock(const struct held_lock *hlock,
+ const struct lockdep_map *lock)
{
if (hlock->instance == lock)
return 1;
@@ -4293,7 +4325,7 @@ static int
return 0;
}
-static nokprobe_inline
+static __always_inline
int __lock_is_held(const struct lockdep_map *lock, int read)
{
struct task_struct *curr = current;
@@ -4506,7 +4538,7 @@ void lock_release(struct lockdep_map *lo
}
EXPORT_SYMBOL_GPL(lock_release);
-int lock_is_held_type(const struct lockdep_map *lock, int read)
+noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
{
unsigned long flags;
int ret = 0;
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -39,6 +39,7 @@ void trace_hardirqs_on(void)
this_cpu_write(tracing_irq_cpu, 0);
}
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
@@ -79,6 +80,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_off);
this_cpu_write(tracing_irq_cpu, 0);
}
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
/*
* Generic 'turn off all lock debugging' function:
*/
-int debug_locks_off(void)
+noinstr int debug_locks_off(void)
{
if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
next prev parent reply other threads:[~2020-03-20 22:05 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-20 17:59 [RESEND][patch V3 00/23] x86/entry: Consolidation part II (syscalls) Thomas Gleixner
2020-03-20 17:59 ` [RESEND][patch V3 01/23] rcu: Dont acquire lock in NMI handler in rcu_nmi_enter_common() Thomas Gleixner
2020-03-24 15:37 ` [patch " Frederic Weisbecker
2020-03-20 17:59 ` [RESEND][patch V3 02/23] rcu: Add comments marking transitions between RCU watching and not Thomas Gleixner
2020-03-24 15:38 ` [patch " Frederic Weisbecker
2020-03-20 17:59 ` [RESEND][patch V3 03/23] vmlinux.lds.h: Create section for protection against instrumentation Thomas Gleixner
2020-03-24 12:26 ` Borislav Petkov
2020-04-03 8:08 ` Alexandre Chartre
2020-03-20 18:00 ` [RESEND][patch V3 04/23] kprobes: Prevent probes in .noinstr.text section Thomas Gleixner
2020-03-23 14:00 ` [patch " Masami Hiramatsu
2020-03-23 16:03 ` Thomas Gleixner
2020-03-24 5:49 ` Masami Hiramatsu
2020-03-24 9:47 ` Thomas Gleixner
2020-03-25 13:39 ` Masami Hiramatsu
2020-03-20 18:00 ` [RESEND][patch V3 05/23] tracing: Provide lockdep less trace_hardirqs_on/off() variants Thomas Gleixner
2020-04-03 8:34 ` Alexandre Chartre
2020-03-20 18:00 ` [RESEND][patch V3 06/23] bug: Annotate WARN/BUG/stackfail as noinstr safe Thomas Gleixner
2020-04-02 21:01 ` Josh Poimboeuf
2020-04-02 21:34 ` Peter Zijlstra
2020-04-02 21:43 ` Josh Poimboeuf
2020-04-02 21:49 ` Thomas Gleixner
2020-03-20 18:00 ` Thomas Gleixner [this message]
2020-03-20 18:00 ` [RESEND][patch V3 08/23] x86/entry: Mark enter_from_user_mode() noinstr Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 09/23] x86/entry/common: Protect against instrumentation Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 10/23] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 11/23] x86/entry: Move irq flags tracing to prepare_exit_to_usermode() Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 12/23] context_tracking: Ensure that the critical path cannot be instrumented Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 13/23] lib/smp_processor_id: Move it into noinstr section Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 14/23] x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 15/23] x86/entry/64: Check IF in __preempt_enable_notrace() thunk Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 16/23] x86/entry/64: Mark ___preempt_schedule_notrace() thunk noinstr Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 17/23] rcu/tree: Mark the idle relevant functions noinstr Thomas Gleixner
2020-03-24 16:09 ` Paul E. McKenney
2020-03-24 19:28 ` Thomas Gleixner
2020-03-24 19:58 ` Paul E. McKenney
2020-03-20 18:00 ` [RESEND][patch V3 18/23] x86/kvm: Move context tracking where it belongs Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 19/23] x86/kvm/vmx: Add hardirq tracing to guest enter/exit Thomas Gleixner
2020-03-24 23:03 ` Peter Zijlstra
2020-03-24 23:21 ` Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 20/23] x86/kvm/svm: Handle hardirqs proper on " Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 21/23] context_tracking: Make guest_enter/exit_irqoff() .noinstr ready Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 22/23] x86/kvm/vmx: Move guest enter/exit into .noinstr.text Thomas Gleixner
2020-03-20 18:00 ` [RESEND][patch V3 23/23] x86/kvm/svm: " Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200320180033.092520097@linutronix.de \
--to=tglx@linutronix.de \
--cc=alexandre.chartre@oracle.com \
--cc=ast@kernel.org \
--cc=brgerst@gmail.com \
--cc=frederic@kernel.org \
--cc=jgross@suse.com \
--cc=joel@joelfernandes.org \
--cc=jpoimboe@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=paulmck@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=thomas.lendacky@amd.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).