From: Peter Zijlstra <peterz@infradead.org>
To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
linux-kernel@vger.kernel.org, Ingo Molnar <mingo@kernel.org>,
Lai Jiangshan <jiangshanlai@gmail.com>,
dipankar@in.ibm.com, Andrew Morton <akpm@linux-foundation.org>,
josh@joshtriplett.org, Thomas Gleixner <tglx@linutronix.de>,
rostedt <rostedt@goodmis.org>,
dhowells@redhat.com, edumazet@google.com, dvhart@linux.intel.com,
fweisbec@gmail.com, oleg@redhat.com,
bobby prani <bobby.prani@gmail.com>
Subject: Re: [PATCH tip/core/rcu 02/18] rcu: Move rcu_report_exp_rnp() to allow consolidation
Date: Wed, 7 Oct 2015 13:50:46 +0200 [thread overview]
Message-ID: <20151007115046.GK11639@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <20151007110120.GE17308@twins.programming.kicks-ass.net>
On Wed, Oct 07, 2015 at 01:01:20PM +0200, Peter Zijlstra wrote:
> That again doesn't explain which UNLOCKs with non-matching lock values
> it pairs with and what particular ordering is important here.
So after staring at that stuff for a while I came up with the following.
Does this make sense, or am I completely misunderstanding things?
Not been near a compiler.
---
kernel/rcu/tree.c | 99 ++++++++++++++++++++++++++++++++++---------------------
1 file changed, 61 insertions(+), 38 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 775d36cc0050..46e1e23ff762 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1460,6 +1460,48 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
}
/*
+ * Wrappers for the rcu_node::lock acquire.
+ *
+ * Because the rcu_nodes form a tree, the tree taversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ */
+static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock_irq(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+static inline void
+_raw_spin_lock_irqsave_rcu_node(struct rcu_node *rnp, unsigned long *flags)
+{
+ _raw_spin_lock_irqsave(&rnp->lock, flags);
+ smp_mb__after_unlock_lock();
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags)
+ _raw_spin_lock_irqsave_rcu_node((rnp), &(flags))
+
+static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
+{
+ bool locked = raw_spin_trylock(&rnp->lock);
+ if (locked)
+ smp_mb__after_unlock_lock();
+ return locked;
+}
+
+
+/*
* Start some future grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
* rcu_node structure's ->need_future_gp field. Returns true if there
@@ -1512,10 +1554,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* hold it, acquire the root rcu_node structure's lock in order to
* start one (if needed).
*/
- if (rnp != rnp_root) {
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
- }
+ if (rnp != rnp_root)
+ raw_spin_lock_rcu_node(rnp);
/*
* Get a new grace-period number. If there really is no grace
@@ -1764,11 +1804,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
rdp->completed == READ_ONCE(rnp->completed) &&
!unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
- !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+ !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
- smp_mb__after_unlock_lock();
needwake = __note_gp_changes(rsp, rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
@@ -1792,8 +1831,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (!READ_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
raw_spin_unlock_irq(&rnp->lock);
@@ -1825,8 +1863,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_leaf_node(rsp, rnp) {
rcu_gp_slow(rsp, gp_preinit_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
@@ -1882,8 +1919,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
rcu_gp_slow(rsp, gp_init_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
@@ -1953,8 +1989,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
}
/* Clear flag to prevent immediate re-entry. */
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WRITE_ONCE(rsp->gp_flags,
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
@@ -1974,8 +2009,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
@@ -2000,8 +2034,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
* grace period is recorded in any of the rcu_node structures.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->completed, rsp->gpnum);
@@ -2016,8 +2049,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
+ raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
@@ -2264,8 +2296,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
oldmask = rnp_c->qsmask;
}
@@ -2312,8 +2343,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
gps = rnp->gpnum;
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
}
@@ -2335,8 +2365,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
struct rcu_node *rnp;
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rdp->passed_quiesce == 0 &&
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
@@ -2562,8 +2591,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (!rnp)
break;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock(); /* GP memory ordering. */
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
rnp->qsmask &= ~mask;
if (rnp->qsmaskinit) {
@@ -2591,8 +2619,7 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rnp->qsmaskinitnext &= ~mask;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -2789,8 +2816,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
rcu_for_each_leaf_node(rsp, rnp) {
cond_resched_rcu_qs();
mask = 0;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) {
if (rcu_state_p == &rcu_sched_state ||
rsp != rcu_state_p ||
@@ -2861,8 +2887,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
/* Reached the root of the rcu_node tree, acquire lock. */
- raw_spin_lock_irqsave(&rnp_old->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
@@ -2985,8 +3010,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
if (!rcu_gp_in_progress(rsp)) {
struct rcu_node *rnp_root = rcu_get_root(rsp);
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_root);
needwake = rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
if (needwake)
@@ -3925,8 +3949,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
*/
rnp = rdp->mynode;
mask = rdp->grpmask;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinitnext |= mask;
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
rdp->completed = rnp->completed;
next prev parent reply other threads:[~2015-10-07 11:50 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-06 16:29 [PATCH tip/core/rcu 0/18] Expedited grace-period improvements for 4.4 Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 01/18] rcu: Use rsp->expedited_wq instead of sync_rcu_preempt_exp_wq Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 02/18] rcu: Move rcu_report_exp_rnp() to allow consolidation Paul E. McKenney
2015-10-06 20:29 ` Peter Zijlstra
2015-10-06 20:58 ` Paul E. McKenney
2015-10-07 7:51 ` Peter Zijlstra
2015-10-07 8:42 ` Mathieu Desnoyers
2015-10-07 11:01 ` Peter Zijlstra
2015-10-07 11:50 ` Peter Zijlstra [this message]
2015-10-07 12:03 ` Peter Zijlstra
2015-10-07 12:05 ` kbuild test robot
2015-10-07 12:09 ` kbuild test robot
2015-10-07 12:11 ` kbuild test robot
2015-10-07 12:17 ` Peter Zijlstra
2015-10-07 13:44 ` [kbuild-all] " Fengguang Wu
2015-10-07 13:55 ` Peter Zijlstra
2015-10-07 14:21 ` Fengguang Wu
2015-10-07 14:28 ` Peter Zijlstra
2015-10-07 15:18 ` Paul E. McKenney
2015-10-08 10:24 ` Peter Zijlstra
2015-10-07 15:15 ` Paul E. McKenney
2015-10-07 14:33 ` Paul E. McKenney
2015-10-07 14:40 ` Peter Zijlstra
2015-10-07 16:48 ` Paul E. McKenney
2015-10-08 9:49 ` Peter Zijlstra
2015-10-08 15:33 ` Paul E. McKenney
2015-10-08 17:12 ` Peter Zijlstra
2015-10-08 17:46 ` Paul E. McKenney
2015-10-09 0:10 ` Paul E. McKenney
2015-10-09 8:44 ` Peter Zijlstra
2015-10-06 16:29 ` [PATCH tip/core/rcu 03/18] rcu: Consolidate tree setup for synchronize_rcu_expedited() Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 04/18] rcu: Use single-stage IPI algorithm for RCU expedited grace period Paul E. McKenney
2015-10-07 13:24 ` Peter Zijlstra
2015-10-07 18:11 ` Paul E. McKenney
2015-10-07 13:35 ` Peter Zijlstra
2015-10-07 15:44 ` Paul E. McKenney
2015-10-07 13:43 ` Peter Zijlstra
2015-10-07 13:49 ` Peter Zijlstra
2015-10-07 16:14 ` Paul E. McKenney
2015-10-08 9:00 ` Peter Zijlstra
2015-10-07 16:13 ` Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 05/18] rcu: Move synchronize_sched_expedited() to combining tree Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 06/18] rcu: Rename qs_pending to core_needs_qs Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 07/18] rcu: Invert passed_quiesce and rename to cpu_no_qs Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 08/18] rcu: Make ->cpu_no_qs be a union for aggregate OR Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 09/18] rcu: Switch synchronize_sched_expedited() to IPI Paul E. McKenney
2015-10-07 14:18 ` Peter Zijlstra
2015-10-07 16:24 ` Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 10/18] rcu: Stop silencing lockdep false positive for expedited grace periods Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 11/18] rcu: Stop excluding CPU hotplug in synchronize_sched_expedited() Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 12/18] cpu: Remove try_get_online_cpus() Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 13/18] rcu: Prepare for consolidating expedited CPU selection Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 14/18] rcu: Consolidate " Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 15/18] rcu: Add online/offline info to expedited stall warning message Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 16/18] rcu: Add tasks to expedited stall-warning messages Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 17/18] rcu: Enable stall warnings for synchronize_rcu_expedited() Paul E. McKenney
2015-10-06 16:29 ` [PATCH tip/core/rcu 18/18] rcu: Better hotplug handling for synchronize_sched_expedited() Paul E. McKenney
2015-10-07 14:26 ` Peter Zijlstra
2015-10-07 16:26 ` Paul E. McKenney
2015-10-08 9:01 ` Peter Zijlstra
2015-10-08 15:06 ` Paul E. McKenney
2015-10-08 15:12 ` Peter Zijlstra
2015-10-08 15:19 ` Paul E. McKenney
2015-10-08 18:01 ` Josh Triplett
2015-10-09 0:11 ` Paul E. McKenney
2015-10-09 0:48 ` Josh Triplett
2015-10-09 3:54 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151007115046.GK11639@twins.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=bobby.prani@gmail.com \
--cc=dhowells@redhat.com \
--cc=dipankar@in.ibm.com \
--cc=dvhart@linux.intel.com \
--cc=edumazet@google.com \
--cc=fweisbec@gmail.com \
--cc=jiangshanlai@gmail.com \
--cc=josh@joshtriplett.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.