* [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
@ 2017-02-09 23:51 Paul E. McKenney
2017-02-13 12:21 ` Peter Zijlstra
0 siblings, 1 reply; 7+ messages in thread
From: Paul E. McKenney @ 2017-02-09 23:51 UTC (permalink / raw)
To: linux-kernel, linux-mm; +Cc: luto, tglx, fweisbec, cmetcalf, peterz, mingo
Currently, IPIs are used to force other CPUs to invalidate their TLBs
in response to a kernel virtual-memory mapping change. This works, but
degrades both battery lifetime (for idle CPUs) and real-time response
(for nohz_full CPUs), and in addition results in unnecessary IPIs due to
the fact that CPUs executing in usermode are unaffected by stale kernel
mappings. It would be better to cause a CPU executing in usermode to
wait until it is entering kernel mode to do the flush, first to avoid
interrupting usemode tasks and second to handle multiple flush requests
with a single flush in the case of a long-running user task.
This commit therefore reserves a bit at the bottom of the ->dynticks
counter, which is checked upon exit from extended quiescent states.
If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is
invoked, which, if not supplied, is an empty single-pass do-while loop.
If this bottom bit is set on -entry- to an extended quiescent state,
then a WARN_ON_ONCE() triggers.
This bottom bit may be set using a new rcu_eqs_special_set() function,
which returns true if the bit was set, or false if the CPU turned
out to not be in an extended quiescent state. Please note that this
function refuses to set the bit for a non-nohz_full CPU when that CPU
is executing in usermode because usermode execution is tracked by RCU
as a dyntick-idle extended quiescent state only for nohz_full CPUs.
Changes since v1: Fix ordering of atomic_and() and the call to
rcu_eqs_special_exit() in rcu_dynticks_eqs_exit().
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4f9b2fa2173d..7232d199a81c 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
return 0;
}
+static inline bool rcu_eqs_special_set(int cpu)
+{
+ return false; /* Never flag non-existent other CPUs! */
+}
+
static inline unsigned long get_state_synchronize_rcu(void)
{
return 0;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d80e0d2f68c6..a473c9b8987d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -272,9 +272,19 @@ void rcu_bh_qs(void)
static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
+/*
+ * Steal a bit from the bottom of ->dynticks for idle entry/exit
+ * control. Initially this is for TLB flushing.
+ */
+#define RCU_DYNTICK_CTRL_MASK 0x1
+#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
+#ifndef rcu_eqs_special_exit
+#define rcu_eqs_special_exit() do { } while (0)
+#endif
+
static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
- .dynticks = ATOMIC_INIT(1),
+ .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
.dynticks_idle = ATOMIC_INIT(1),
@@ -288,15 +298,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
static void rcu_dynticks_eqs_enter(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- int special;
+ int seq;
/*
* CPUs seeing atomic_inc_return() must see prior RCU read-side
* critical sections, and we also must force ordering with the
* next idle sojourn.
*/
- special = atomic_inc_return(&rdtp->dynticks);
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
+ seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+ /* Better be in an extended quiescent state! */
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ (seq & RCU_DYNTICK_CTRL_CTR));
+ /* Better not have special action (TLB flush) pending! */
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ (seq & RCU_DYNTICK_CTRL_MASK));
}
/*
@@ -306,15 +321,22 @@ static void rcu_dynticks_eqs_enter(void)
static void rcu_dynticks_eqs_exit(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- int special;
+ int seq;
/*
* CPUs seeing atomic_inc_return() must see prior idle sojourns,
* and we also must force ordering with the next RCU read-side
* critical section.
*/
- special = atomic_inc_return(&rdtp->dynticks);
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
+ seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ !(seq & RCU_DYNTICK_CTRL_CTR));
+ if (seq & RCU_DYNTICK_CTRL_MASK) {
+ atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
+ smp_mb__after_atomic(); /* _exit after clearing mask. */
+ /* Prefer duplicate flushes to losing a flush. */
+ rcu_eqs_special_exit();
+ }
}
/*
@@ -331,9 +353,9 @@ static void rcu_dynticks_eqs_online(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- if (atomic_read(&rdtp->dynticks) & 0x1)
+ if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
return;
- atomic_add(0x1, &rdtp->dynticks);
+ atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
}
/*
@@ -345,7 +367,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- return !(atomic_read(&rdtp->dynticks) & 0x1);
+ return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
}
/*
@@ -356,7 +378,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
{
int snap = atomic_add_return(0, &rdtp->dynticks);
- return snap;
+ return snap & ~RCU_DYNTICK_CTRL_MASK;
}
/*
@@ -365,7 +387,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
*/
static bool rcu_dynticks_in_eqs(int snap)
{
- return !(snap & 0x1);
+ return !(snap & RCU_DYNTICK_CTRL_CTR);
}
/*
@@ -385,10 +407,33 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
static void rcu_dynticks_momentary_idle(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- int special = atomic_add_return(2, &rdtp->dynticks);
+ int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
+ &rdtp->dynticks);
/* It is illegal to call this from idle state. */
- WARN_ON_ONCE(!(special & 0x1));
+ WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+}
+
+/*
+ * Set the special (bottom) bit of the specified CPU so that it
+ * will take special action (such as flushing its TLB) on the
+ * next exit from an extended quiescent state. Returns true if
+ * the bit was successfully set, or false if the CPU was not in
+ * an extended quiescent state.
+ */
+bool rcu_eqs_special_set(int cpu)
+{
+ int old;
+ int new;
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+ do {
+ old = atomic_read(&rdtp->dynticks);
+ if (old & RCU_DYNTICK_CTRL_CTR)
+ return false;
+ new = old | RCU_DYNTICK_CTRL_MASK;
+ } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
+ return true;
}
DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b60f2b6caa14..5a2ad582b4b6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -595,6 +595,7 @@ extern struct rcu_state rcu_preempt_state;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
+bool rcu_eqs_special_set(int cpu);
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-09 23:51 [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter Paul E. McKenney
@ 2017-02-13 12:21 ` Peter Zijlstra
2017-02-13 17:01 ` Paul E. McKenney
0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2017-02-13 12:21 UTC (permalink / raw)
To: Paul E. McKenney
Cc: linux-kernel, linux-mm, luto, tglx, fweisbec, cmetcalf, mingo
On Thu, Feb 09, 2017 at 03:51:03PM -0800, Paul E. McKenney wrote:
> Currently, IPIs are used to force other CPUs to invalidate their TLBs
> in response to a kernel virtual-memory mapping change. This works, but
> degrades both battery lifetime (for idle CPUs) and real-time response
> (for nohz_full CPUs), and in addition results in unnecessary IPIs due to
> the fact that CPUs executing in usermode are unaffected by stale kernel
> mappings. It would be better to cause a CPU executing in usermode to
> wait until it is entering kernel mode to do the flush, first to avoid
> interrupting usemode tasks and second to handle multiple flush requests
> with a single flush in the case of a long-running user task.
>
> This commit therefore reserves a bit at the bottom of the ->dynticks
> counter, which is checked upon exit from extended quiescent states.
> If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is
> invoked, which, if not supplied, is an empty single-pass do-while loop.
> If this bottom bit is set on -entry- to an extended quiescent state,
> then a WARN_ON_ONCE() triggers.
>
> This bottom bit may be set using a new rcu_eqs_special_set() function,
> which returns true if the bit was set, or false if the CPU turned
> out to not be in an extended quiescent state. Please note that this
> function refuses to set the bit for a non-nohz_full CPU when that CPU
> is executing in usermode because usermode execution is tracked by RCU
> as a dyntick-idle extended quiescent state only for nohz_full CPUs.
>
> Changes since v1: Fix ordering of atomic_and() and the call to
> rcu_eqs_special_exit() in rcu_dynticks_eqs_exit().
>
> Reported-by: Andy Lutomirski <luto@amacapital.net>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
I think I've asked this before, but why does this live in the guts of
RCU?
Should we lift this state tracking stuff out and make RCU and
NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
In any case, small nit below:
> + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> + !(seq & RCU_DYNTICK_CTRL_CTR));
> + if (seq & RCU_DYNTICK_CTRL_MASK) {
> + atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> + smp_mb__after_atomic(); /* _exit after clearing mask. */
> + /* Prefer duplicate flushes to losing a flush. */
> + rcu_eqs_special_exit();
> + }
we have atomic_andnot() for just these occasions :-)
> +/*
> + * Set the special (bottom) bit of the specified CPU so that it
> + * will take special action (such as flushing its TLB) on the
> + * next exit from an extended quiescent state. Returns true if
> + * the bit was successfully set, or false if the CPU was not in
> + * an extended quiescent state.
> + */
> +bool rcu_eqs_special_set(int cpu)
> +{
> + int old;
> + int new;
> + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> +
> + do {
> + old = atomic_read(&rdtp->dynticks);
> + if (old & RCU_DYNTICK_CTRL_CTR)
> + return false;
> + new = old | RCU_DYNTICK_CTRL_MASK;
> + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> + return true;
Is that what we call atomic_fetch_or() ?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-13 12:21 ` Peter Zijlstra
@ 2017-02-13 17:01 ` Paul E. McKenney
2017-02-13 17:57 ` Peter Zijlstra
0 siblings, 1 reply; 7+ messages in thread
From: Paul E. McKenney @ 2017-02-13 17:01 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, linux-mm, luto, tglx, fweisbec, cmetcalf, mingo
On Mon, Feb 13, 2017 at 01:21:15PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 09, 2017 at 03:51:03PM -0800, Paul E. McKenney wrote:
> > Currently, IPIs are used to force other CPUs to invalidate their TLBs
> > in response to a kernel virtual-memory mapping change. This works, but
> > degrades both battery lifetime (for idle CPUs) and real-time response
> > (for nohz_full CPUs), and in addition results in unnecessary IPIs due to
> > the fact that CPUs executing in usermode are unaffected by stale kernel
> > mappings. It would be better to cause a CPU executing in usermode to
> > wait until it is entering kernel mode to do the flush, first to avoid
> > interrupting usemode tasks and second to handle multiple flush requests
> > with a single flush in the case of a long-running user task.
> >
> > This commit therefore reserves a bit at the bottom of the ->dynticks
> > counter, which is checked upon exit from extended quiescent states.
> > If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is
> > invoked, which, if not supplied, is an empty single-pass do-while loop.
> > If this bottom bit is set on -entry- to an extended quiescent state,
> > then a WARN_ON_ONCE() triggers.
> >
> > This bottom bit may be set using a new rcu_eqs_special_set() function,
> > which returns true if the bit was set, or false if the CPU turned
> > out to not be in an extended quiescent state. Please note that this
> > function refuses to set the bit for a non-nohz_full CPU when that CPU
> > is executing in usermode because usermode execution is tracked by RCU
> > as a dyntick-idle extended quiescent state only for nohz_full CPUs.
> >
> > Changes since v1: Fix ordering of atomic_and() and the call to
> > rcu_eqs_special_exit() in rcu_dynticks_eqs_exit().
> >
> > Reported-by: Andy Lutomirski <luto@amacapital.net>
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>
> I think I've asked this before, but why does this live in the guts of
> RCU?
>
> Should we lift this state tracking stuff out and make RCU and
> NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
The dyntick-idle stuff is pretty specific to RCU. And what precisely
would be helped by moving it?
But that was an excellent question, as it reminded me of RCU's
dyntick-idle's NMI handling, and I never did ask Andy if it was OK for
rcu_eqs_special_exit() to be invoked when exiting NMI handler, which would
currently happen. It would be easy for me to pass in a flag indicating
whether or not the call is in NMI context, if that is needed.
It is of course not possible to detect this at rcu_eqs_special_set()
time, because rcu_eqs_special_set() has no way of knowing that the next
event that pulls the remote CPU out of idle will be an NMI.
> In any case, small nit below:
>
>
> > + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> > + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > + !(seq & RCU_DYNTICK_CTRL_CTR));
> > + if (seq & RCU_DYNTICK_CTRL_MASK) {
> > + atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> > + smp_mb__after_atomic(); /* _exit after clearing mask. */
> > + /* Prefer duplicate flushes to losing a flush. */
> > + rcu_eqs_special_exit();
> > + }
>
> we have atomic_andnot() for just these occasions :-)
I suppose that that could generate more efficient code on some
architectures. I have changed this.
> > +/*
> > + * Set the special (bottom) bit of the specified CPU so that it
> > + * will take special action (such as flushing its TLB) on the
> > + * next exit from an extended quiescent state. Returns true if
> > + * the bit was successfully set, or false if the CPU was not in
> > + * an extended quiescent state.
> > + */
> > +bool rcu_eqs_special_set(int cpu)
> > +{
> > + int old;
> > + int new;
> > + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> > +
> > + do {
> > + old = atomic_read(&rdtp->dynticks);
> > + if (old & RCU_DYNTICK_CTRL_CTR)
> > + return false;
> > + new = old | RCU_DYNTICK_CTRL_MASK;
> > + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> > + return true;
>
> Is that what we call atomic_fetch_or() ?
I don't think so. The above code takes an early exit if the next bit
up is set, which atomic_fetch_or() does not. If the CPU is not in
an extended quiescent state (old & RCU_DYNTICK_CTRL_CTR), then this
code returns false to indicate that TLB shootdown cannot wait.
So it is more like a very specific form of atomic_fetch_or_unless().
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-13 17:01 ` Paul E. McKenney
@ 2017-02-13 17:57 ` Peter Zijlstra
2017-02-13 18:19 ` Paul E. McKenney
2017-02-13 19:08 ` Andy Lutomirski
0 siblings, 2 replies; 7+ messages in thread
From: Peter Zijlstra @ 2017-02-13 17:57 UTC (permalink / raw)
To: Paul E. McKenney
Cc: linux-kernel, linux-mm, luto, tglx, fweisbec, cmetcalf, mingo
On Mon, Feb 13, 2017 at 09:01:04AM -0800, Paul E. McKenney wrote:
> > I think I've asked this before, but why does this live in the guts of
> > RCU?
> >
> > Should we lift this state tracking stuff out and make RCU and
> > NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
>
> The dyntick-idle stuff is pretty specific to RCU. And what precisely
> would be helped by moving it?
Maybe untangle the inter-dependencies somewhat. It just seems a wee bit
odd to have arch TLB invalidate depend on RCU implementation details
like this.
> But that was an excellent question, as it reminded me of RCU's
> dyntick-idle's NMI handling, and I never did ask Andy if it was OK for
> rcu_eqs_special_exit() to be invoked when exiting NMI handler, which would
> currently happen. It would be easy for me to pass in a flag indicating
> whether or not the call is in NMI context, if that is needed.
>
> It is of course not possible to detect this at rcu_eqs_special_set()
> time, because rcu_eqs_special_set() has no way of knowing that the next
> event that pulls the remote CPU out of idle will be an NMI.
>
> > In any case, small nit below:
> >
> >
> > > + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> > > + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > > + !(seq & RCU_DYNTICK_CTRL_CTR));
> > > + if (seq & RCU_DYNTICK_CTRL_MASK) {
> > > + atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> > > + smp_mb__after_atomic(); /* _exit after clearing mask. */
> > > + /* Prefer duplicate flushes to losing a flush. */
> > > + rcu_eqs_special_exit();
> > > + }
> >
> > we have atomic_andnot() for just these occasions :-)
>
> I suppose that that could generate more efficient code on some
> architectures. I have changed this.
Right, saves 1 instruction on a number of archs. Not the end of the
world of course, but since we have the thing might as well use it.
> > > +/*
> > > + * Set the special (bottom) bit of the specified CPU so that it
> > > + * will take special action (such as flushing its TLB) on the
> > > + * next exit from an extended quiescent state. Returns true if
> > > + * the bit was successfully set, or false if the CPU was not in
> > > + * an extended quiescent state.
> > > + */
> > > +bool rcu_eqs_special_set(int cpu)
> > > +{
> > > + int old;
> > > + int new;
> > > + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> > > +
> > > + do {
> > > + old = atomic_read(&rdtp->dynticks);
> > > + if (old & RCU_DYNTICK_CTRL_CTR)
> > > + return false;
> > > + new = old | RCU_DYNTICK_CTRL_MASK;
> > > + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> > > + return true;
> >
> > Is that what we call atomic_fetch_or() ?
>
> I don't think so. The above code takes an early exit if the next bit
> up is set, which atomic_fetch_or() does not. If the CPU is not in
> an extended quiescent state (old & RCU_DYNTICK_CTRL_CTR), then this
> code returns false to indicate that TLB shootdown cannot wait.
Oh duh yes, reading be hard.
> So it is more like a very specific form of atomic_fetch_or_unless().
Right, I actually have a similar construct in set_nr_if_polling().
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-13 17:57 ` Peter Zijlstra
@ 2017-02-13 18:19 ` Paul E. McKenney
2017-02-13 19:08 ` Andy Lutomirski
1 sibling, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2017-02-13 18:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, linux-mm, luto, tglx, fweisbec, cmetcalf, mingo
On Mon, Feb 13, 2017 at 06:57:50PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 13, 2017 at 09:01:04AM -0800, Paul E. McKenney wrote:
> > > I think I've asked this before, but why does this live in the guts of
> > > RCU?
> > >
> > > Should we lift this state tracking stuff out and make RCU and
> > > NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
> >
> > The dyntick-idle stuff is pretty specific to RCU. And what precisely
> > would be helped by moving it?
>
> Maybe untangle the inter-dependencies somewhat. It just seems a wee bit
> odd to have arch TLB invalidate depend on RCU implementation details
> like this.
I don't know about that. After all, my lazy TLB invalidation work in
DYNIX/ptx was a key stepping-stone to my first RCU implementation. ;-)
More seriously, I don't believe moving it out would make it less odd.
> > But that was an excellent question, as it reminded me of RCU's
> > dyntick-idle's NMI handling, and I never did ask Andy if it was OK for
> > rcu_eqs_special_exit() to be invoked when exiting NMI handler, which would
> > currently happen. It would be easy for me to pass in a flag indicating
> > whether or not the call is in NMI context, if that is needed.
> >
> > It is of course not possible to detect this at rcu_eqs_special_set()
> > time, because rcu_eqs_special_set() has no way of knowing that the next
> > event that pulls the remote CPU out of idle will be an NMI.
> >
> > > In any case, small nit below:
> > >
> > >
> > > > + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> > > > + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > > > + !(seq & RCU_DYNTICK_CTRL_CTR));
> > > > + if (seq & RCU_DYNTICK_CTRL_MASK) {
> > > > + atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> > > > + smp_mb__after_atomic(); /* _exit after clearing mask. */
> > > > + /* Prefer duplicate flushes to losing a flush. */
> > > > + rcu_eqs_special_exit();
> > > > + }
> > >
> > > we have atomic_andnot() for just these occasions :-)
> >
> > I suppose that that could generate more efficient code on some
> > architectures. I have changed this.
>
> Right, saves 1 instruction on a number of archs. Not the end of the
> world of course, but since we have the thing might as well use it.
Understood -- could be worth the extra two characters of source code.
I have made this change locally, and will push it to -rcu.
> > > > +/*
> > > > + * Set the special (bottom) bit of the specified CPU so that it
> > > > + * will take special action (such as flushing its TLB) on the
> > > > + * next exit from an extended quiescent state. Returns true if
> > > > + * the bit was successfully set, or false if the CPU was not in
> > > > + * an extended quiescent state.
> > > > + */
> > > > +bool rcu_eqs_special_set(int cpu)
> > > > +{
> > > > + int old;
> > > > + int new;
> > > > + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> > > > +
> > > > + do {
> > > > + old = atomic_read(&rdtp->dynticks);
> > > > + if (old & RCU_DYNTICK_CTRL_CTR)
> > > > + return false;
> > > > + new = old | RCU_DYNTICK_CTRL_MASK;
> > > > + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> > > > + return true;
> > >
> > > Is that what we call atomic_fetch_or() ?
> >
> > I don't think so. The above code takes an early exit if the next bit
> > up is set, which atomic_fetch_or() does not. If the CPU is not in
> > an extended quiescent state (old & RCU_DYNTICK_CTRL_CTR), then this
> > code returns false to indicate that TLB shootdown cannot wait.
>
> Oh duh yes, reading be hard.
I know that feeling!
> > So it is more like a very specific form of atomic_fetch_or_unless().
>
> Right, I actually have a similar construct in set_nr_if_polling().
I was going to suggest combining them, but set_nr_if_polling() needs two
different exit checks, and with two different return values. Not sure
it is worth it, but of course if someone does come up with an appropriate
primitive, I can always switch to it.
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-13 17:57 ` Peter Zijlstra
2017-02-13 18:19 ` Paul E. McKenney
@ 2017-02-13 19:08 ` Andy Lutomirski
2017-02-13 19:52 ` Paul E. McKenney
1 sibling, 1 reply; 7+ messages in thread
From: Andy Lutomirski @ 2017-02-13 19:08 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Paul E. McKenney, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, Andrew Lutomirski, Thomas Gleixner,
Frederic Weisbecker, Chris Metcalf, Ingo Molnar
On Mon, Feb 13, 2017 at 9:57 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Feb 13, 2017 at 09:01:04AM -0800, Paul E. McKenney wrote:
>> > I think I've asked this before, but why does this live in the guts of
>> > RCU?
>> >
>> > Should we lift this state tracking stuff out and make RCU and
>> > NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
>>
>> The dyntick-idle stuff is pretty specific to RCU. And what precisely
>> would be helped by moving it?
>
> Maybe untangle the inter-dependencies somewhat. It just seems a wee bit
> odd to have arch TLB invalidate depend on RCU implementation details
> like this.
This came out of a courtyard discussion at KS/LPC. The idea is that
this optimzation requires an atomic op that could be shared with RCU
and that we probably care a lot more about this optimization on
kernels with context tracking enabled, so putting it in RCU has nice
performance properties. Other than that, it doesn't make a huge
amount of sense.
Amusingly, Darwin appears to do something similar without an atomic
op, and I have no idea why that's safe.
--Andy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter
2017-02-13 19:08 ` Andy Lutomirski
@ 2017-02-13 19:52 ` Paul E. McKenney
0 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2017-02-13 19:52 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Peter Zijlstra, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Andrew Lutomirski, Thomas Gleixner, Frederic Weisbecker,
Chris Metcalf, Ingo Molnar
On Mon, Feb 13, 2017 at 11:08:55AM -0800, Andy Lutomirski wrote:
> On Mon, Feb 13, 2017 at 9:57 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, Feb 13, 2017 at 09:01:04AM -0800, Paul E. McKenney wrote:
> >> > I think I've asked this before, but why does this live in the guts of
> >> > RCU?
> >> >
> >> > Should we lift this state tracking stuff out and make RCU and
> >> > NOHZ(_FULL) users of it, or doesn't that make sense (reason)?
> >>
> >> The dyntick-idle stuff is pretty specific to RCU. And what precisely
> >> would be helped by moving it?
> >
> > Maybe untangle the inter-dependencies somewhat. It just seems a wee bit
> > odd to have arch TLB invalidate depend on RCU implementation details
> > like this.
>
> This came out of a courtyard discussion at KS/LPC. The idea is that
> this optimzation requires an atomic op that could be shared with RCU
> and that we probably care a lot more about this optimization on
> kernels with context tracking enabled, so putting it in RCU has nice
> performance properties. Other than that, it doesn't make a huge
> amount of sense.
>
> Amusingly, Darwin appears to do something similar without an atomic
> op, and I have no idea why that's safe.
Given that they run on ARM, I have no idea either. Maybe they don't
need to be quite as bulletproof on idle-duration detection? Rumor has it
that their variant of RCU uses program-counter ranges, so they wouldn't
have the RCU tie-in -- just checks of program-counter ranges and
interesting dependencies on the compiler.
Thanx, Paul
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-02-13 19:52 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-09 23:51 [PATCH RFC v2 tip/core/rcu] Maintain special bits at bottom of ->dynticks counter Paul E. McKenney
2017-02-13 12:21 ` Peter Zijlstra
2017-02-13 17:01 ` Paul E. McKenney
2017-02-13 17:57 ` Peter Zijlstra
2017-02-13 18:19 ` Paul E. McKenney
2017-02-13 19:08 ` Andy Lutomirski
2017-02-13 19:52 ` Paul E. McKenney
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).