* [PATCH RFC nohz_full 2/8] nohz_full: Add rcu_dyntick data for scalable detection of all-idle state
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 3/8] nohz_full: Add per-CPU idle-state tracking Paul E. McKenney
` (5 subsequent siblings)
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
This commit adds fields to the rcu_dyntick structure that are used to
detect idle CPUs. These new fields differ from the existing ones in
that the existing ones consider a CPU executing in user mode to be idle,
where the new ones consider CPUs executing in user mode to be busy.
The handling of these new fields is otherwise quite similar to that for
the exiting fields. This commit also adds the initialization required
for these fields.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree.c | 5 +++++
kernel/rcutree.h | 11 +++++++++++
kernel/rcutree_plugin.h | 19 +++++++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 410b5ef..212ef97 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -209,6 +209,10 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(1),
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+ .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
+ .dynticks_idle = ATOMIC_INIT(1),
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
};
static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
@@ -2902,6 +2906,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->blimit = blimit;
init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+ rcu_sysidle_init_percpu_data(rdp->dynticks);
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4a39d36..29e9bc3 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -88,6 +88,16 @@ struct rcu_dynticks {
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+ long long dynticks_idle_nesting;
+ /* irq/process nesting level from idle. */
+ atomic_t dynticks_idle; /* Even value for idle, else odd. */
+ /* "Idle" excludes userspace execution. */
+ unsigned long dynticks_idle_jiffies;
+ /* End of last non-NMI non-idle period. */
+ unsigned long dynticks_nmi_jiffies;
+ /* End of last NMI non-idle period. */
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
#ifdef CONFIG_RCU_FAST_NO_HZ
bool all_lazy; /* Are all CPU's CBs lazy? */
unsigned long nonlazy_posted;
@@ -545,6 +555,7 @@ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void rcu_kick_nohz_cpu(int cpu);
static bool init_nocb_callback_list(struct rcu_data *rdp);
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63098a5..ca93333 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2375,3 +2375,22 @@ static void rcu_kick_nohz_cpu(int cpu)
smp_send_reschedule(cpu);
#endif /* #ifdef CONFIG_NO_HZ_FULL */
}
+
+
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+
+/*
+ * Initialize dynticks sysidle state for CPUs coming online.
+ */
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
+{
+ rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
+}
+
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+
+static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 3/8] nohz_full: Add per-CPU idle-state tracking
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 2/8] nohz_full: Add rcu_dyntick data " Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 4/8] nohz_full: Add per-CPU idle-state tracking for NMIs Paul E. McKenney
` (4 subsequent siblings)
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
This commit adds the code that updates the rcu_dyntick structure's new
fields to track the per-CPU idle state based on interrupts and transitions
into and out of the idle loop (NMIs will be handled separately). This
code is similar to the code that maintains RCU's idea of per-CPU idleness,
but differs in that RCU treats CPUs running in user mode as idle, where
this new code does not.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree.c | 4 +++
kernel/rcutree.h | 2 ++
kernel/rcutree_plugin.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 85 insertions(+)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 212ef97..c814ce1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -416,6 +416,7 @@ void rcu_idle_enter(void)
local_irq_save(flags);
rcu_eqs_enter(false);
+ rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -466,6 +467,7 @@ void rcu_irq_exit(void)
trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
else
rcu_eqs_enter_common(rdtp, oldval, true);
+ rcu_sysidle_enter(rdtp, 1);
local_irq_restore(flags);
}
@@ -534,6 +536,7 @@ void rcu_idle_exit(void)
local_irq_save(flags);
rcu_eqs_exit(false);
+ rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -585,6 +588,7 @@ void rcu_irq_enter(void)
trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
else
rcu_eqs_exit_common(rdtp, oldval, true);
+ rcu_sysidle_exit(rdtp, 1);
local_irq_restore(flags);
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 29e9bc3..a56d1f1 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -555,6 +555,8 @@ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void rcu_kick_nohz_cpu(int cpu);
static bool init_nocb_callback_list(struct rcu_data *rdp);
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ca93333..b704979 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2380,6 +2380,77 @@ static void rcu_kick_nohz_cpu(int cpu)
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
/*
+ * Invoked to note exit from irq or task transition to idle. Note that
+ * usermode execution does -not- count as idle here! The caller must
+ * have disabled interrupts.
+ */
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
+{
+ unsigned long j;
+
+ /* Adjust nesting, check for fully idle. */
+ if (irq) {
+ rdtp->dynticks_idle_nesting--;
+ WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
+ if (rdtp->dynticks_idle_nesting != 0)
+ return; /* Still not fully idle. */
+ } else {
+ if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
+ DYNTICK_TASK_NEST_VALUE) {
+ rdtp->dynticks_idle_nesting = 0;
+ } else {
+ rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
+ WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
+ return; /* Still not fully idle. */
+ }
+ }
+
+ /* Record start of fully idle period. */
+ j = jiffies;
+ ACCESS_ONCE(rdtp->dynticks_nmi_jiffies) = j;
+ ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
+ smp_mb__before_atomic_inc();
+ atomic_inc(&rdtp->dynticks_idle);
+ smp_mb__after_atomic_inc();
+ WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
+}
+
+/*
+ * Invoked to note entry to irq or task transition from idle. Note that
+ * usermode execution does -not- count as idle here! The caller must
+ * have disabled interrupts.
+ */
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
+{
+ /* Adjust nesting, check for already non-idle. */
+ if (irq) {
+ rdtp->dynticks_idle_nesting++;
+ WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
+ if (rdtp->dynticks_idle_nesting != 1)
+ return; /* Already non-idle. */
+ } else {
+ /*
+ * Allow for irq misnesting. Yes, it really is possible
+ * to enter an irq handler then never leave it, and maybe
+ * also vice versa. Handle both possibilities.
+ */
+ if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
+ rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
+ WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
+ return; /* Already non-idle. */
+ } else {
+ rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
+ }
+ }
+
+ /* Record end of idle period. */
+ smp_mb__before_atomic_inc();
+ atomic_inc(&rdtp->dynticks_idle);
+ smp_mb__after_atomic_inc();
+ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
+}
+
+/*
* Initialize dynticks sysidle state for CPUs coming online.
*/
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
@@ -2389,6 +2460,14 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
+{
+}
+
+static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
+{
+}
+
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
{
}
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 4/8] nohz_full: Add per-CPU idle-state tracking for NMIs
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 2/8] nohz_full: Add rcu_dyntick data " Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 3/8] nohz_full: Add per-CPU idle-state tracking Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 5/8] nohz_full: Add full-system idle states and variables Paul E. McKenney
` (3 subsequent siblings)
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
It turns out that we can reuse RCU's ->dynticks counter to identify
CPUs that are non-idle due to NMIs from idle, in combination with the
new full-system idle ->dynticks_idle counter. The reason this works
can be seen from the following table:
->dynticks ->dynticks_idle union
NMI from idle: non-idle idle non-idle
NMI from user: non-idle non-idle non-idle
NMI from non-idle kernel: non-idle non-idle non-idle
idle: idle idle idle
user: idle non-idle non-idle
non-idle kernel: non-idle non-idle non-idle
Note that the final "union" column gets us what we need: A non-idle
indication in all cases except when the CPU really is in the idle loop.
(But what about interrupt handlers? They are treated the same as
non-idle kernel.)
Therefore, if both ->dynticks and ->dynticks_idle say that the corresponding
CPU is idle (in other words, both have odd values), then the CPU really
is idle.
The only additional thing that this commit needs to supply is the time
that the last NMI either started or ended for the corresponding CPU.
This is used to determine whether or not this CPU has been idle long
enough to justify updating the global full-system idle state.
Final caveat: This approach assumes that NMI handlers do not access
system time, an assumption that the existing dyntick-idle code also
makes. To see this, suppose that the system has been idle for an
extended period of time, so that the clock values are obsolete, and
that an NMI arrives. The NMI handler has no safe way to update the
clock values, and thus must do without.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree.c | 7 +++++--
kernel/rcutree.h | 1 +
kernel/rcutree_plugin.h | 9 +++++++++
3 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index c814ce1..02b879a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -607,6 +607,7 @@ void rcu_nmi_enter(void)
(atomic_read(&rdtp->dynticks) & 0x1))
return;
rdtp->dynticks_nmi_nesting++;
+ rcu_sysidle_nmi_jiffies(rdtp);
smp_mb__before_atomic_inc(); /* Force delay from prior write. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -625,8 +626,10 @@ void rcu_nmi_exit(void)
{
struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->dynticks_nmi_nesting == 0 ||
- --rdtp->dynticks_nmi_nesting != 0)
+ if (rdtp->dynticks_nmi_nesting == 0)
+ return;
+ rcu_sysidle_nmi_jiffies(rdtp);
+ if (--rdtp->dynticks_nmi_nesting != 0)
return;
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
smp_mb__before_atomic_inc(); /* See above. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index a56d1f1..11d7144 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -557,6 +557,7 @@ static void rcu_kick_nohz_cpu(int cpu);
static bool init_nocb_callback_list(struct rcu_data *rdp);
static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
+static void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b704979..a00d5c9 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2450,6 +2450,11 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
}
+static inline void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp)
+{
+ rdtp->dynticks_nmi_jiffies = jiffies;
+}
+
/*
* Initialize dynticks sysidle state for CPUs coming online.
*/
@@ -2468,6 +2473,10 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
{
}
+static inline void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp)
+{
+}
+
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
{
}
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 5/8] nohz_full: Add full-system idle states and variables
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
` (2 preceding siblings ...)
2013-06-25 21:37 ` [PATCH RFC nohz_full 4/8] nohz_full: Add per-CPU idle-state tracking for NMIs Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 6/8] nohz_full: Add full-system-idle arguments to API Paul E. McKenney
` (2 subsequent siblings)
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
This commit adds control variables and states for full-system idle.
The system will progress through the states in numerical order when
the system is fully idle (other than the timekeeping CPU), and reset
down to the initial state if any non-timekeeping CPU goes non-idle.
The current state is kept in full_sysidle_state.
A RCU_SYSIDLE_SMALL macro is defined, and systems with this number
of CPUs or fewer move through the states more aggressively. The idea
is that the resulting memory contention is less of a problem on small
systems. Architectures can adjust this value (which defaults to 8)
using CONFIG_ARCH_RCU_SYSIDLE_SMALL.
One flavor of RCU will be in charge of driving the state machine,
defined by rcu_sysidle_state. This should be the busiest flavor of RCU.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree_plugin.h | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a00d5c9..349f426 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2380,6 +2380,33 @@ static void rcu_kick_nohz_cpu(int cpu)
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
/*
+ * Handle small systems specially, accelerating their transition into
+ * full idle state. Allow arches to override this code's idea of
+ * what constitutes a "small" system.
+ */
+#ifdef CONFIG_ARCH_RCU_SYSIDLE_SMALL
+#define RCU_SYSIDLE_SMALL CONFIG_ARCH_RCU_SYSIDLE_SMALL
+#else /* #ifdef CONFIG_ARCH_RCU_SYSIDLE_SMALL */
+#define RCU_SYSIDLE_SMALL 8
+#endif
+
+/*
+ * Define RCU flavor that holds sysidle state. This needs to be the
+ * most active flavor of RCU.
+ */
+#ifdef CONFIG_PREEMPT_RCU
+static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_preempt_state;
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_sched_state;
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+static int __maybe_unused full_sysidle_state; /* Current system-idle state. */
+#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
+#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
+#define RCU_SYSIDLE_FULL 2 /* All CPUs idle, ready for sysidle. */
+#define RCU_SYSIDLE_FULL_NOTED 3 /* Actually entered sysidle state. */
+
+/*
* Invoked to note exit from irq or task transition to idle. Note that
* usermode execution does -not- count as idle here! The caller must
* have disabled interrupts.
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 6/8] nohz_full: Add full-system-idle arguments to API
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
` (3 preceding siblings ...)
2013-06-25 21:37 ` [PATCH RFC nohz_full 5/8] nohz_full: Add full-system idle states and variables Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 7/8] nohz_full: Add full-system-idle state machine Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 8/8] nohz_full: Force RCU's grace-period kthreads onto timekeeping CPU Paul E. McKenney
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
This commit adds an isidle and jiffies argument to force_qs_rnp(),
dyntick_save_progress_counter(), and rcu_implicit_dynticks_qs() to enable
RCU's force-quiescent-state process to check for full-system idle.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 02b879a..8f5d9f2 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -231,7 +231,9 @@ module_param(jiffies_till_next_fqs, ulong, 0644);
static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
+static void force_qs_rnp(struct rcu_state *rsp,
+ int (*f)(struct rcu_data *, bool *, unsigned long *),
+ bool *isidle, unsigned long *maxj);
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(int cpu);
@@ -715,7 +717,8 @@ static int rcu_is_cpu_rrupt_from_idle(void)
* credit them with an implicit quiescent state. Return 1 if this CPU
* is in dynticks idle mode, which is an extended quiescent state.
*/
-static int dyntick_save_progress_counter(struct rcu_data *rdp)
+static int dyntick_save_progress_counter(struct rcu_data *rdp,
+ bool *isidle, unsigned long *maxj)
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
return (rdp->dynticks_snap & 0x1) == 0;
@@ -727,7 +730,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
* idle state since the last call to dyntick_save_progress_counter()
* for this same CPU, or by virtue of having been offline.
*/
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
+ bool *isidle, unsigned long *maxj)
{
unsigned int curr;
unsigned int snap;
@@ -1348,16 +1352,19 @@ static int rcu_gp_init(struct rcu_state *rsp)
int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
{
int fqs_state = fqs_state_in;
+ bool isidle = 0;
+ unsigned long maxj;
struct rcu_node *rnp = rcu_get_root(rsp);
rsp->n_force_qs++;
if (fqs_state == RCU_SAVE_DYNTICK) {
/* Collect dyntick-idle snapshots. */
- force_qs_rnp(rsp, dyntick_save_progress_counter);
+ force_qs_rnp(rsp, dyntick_save_progress_counter,
+ &isidle, &maxj);
fqs_state = RCU_FORCE_QS;
} else {
/* Handle dyntick-idle and offline CPUs. */
- force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
+ force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
}
/* Clear flag to prevent immediate re-entry. */
if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -2058,7 +2065,9 @@ void rcu_check_callbacks(int cpu, int user)
*
* The caller must have suppressed start of new grace periods.
*/
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
+static void force_qs_rnp(struct rcu_state *rsp,
+ int (*f)(struct rcu_data *, bool *, unsigned long *),
+ bool *isidle, unsigned long *maxj)
{
unsigned long bit;
int cpu;
@@ -2082,7 +2091,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
bit = 1;
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
if ((rnp->qsmask & bit) != 0 &&
- f(per_cpu_ptr(rsp->rda, cpu)))
+ f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
mask |= bit;
}
if (mask != 0) {
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 7/8] nohz_full: Add full-system-idle state machine
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
` (4 preceding siblings ...)
2013-06-25 21:37 ` [PATCH RFC nohz_full 6/8] nohz_full: Add full-system-idle arguments to API Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
2013-06-25 21:37 ` [PATCH RFC nohz_full 8/8] nohz_full: Force RCU's grace-period kthreads onto timekeeping CPU Paul E. McKenney
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
This commit adds the state machine that takes the per-CPU idle data
as input and produces a full-system-idle indication as output. This
state machine is driven out of RCU's quiescent-state-forcing
mechanism, which invokes rcu_sysidle_check_cpu() to collect per-CPU
idle state and then rcu_sysidle_report() to drive the state machine.
The full-system-idle state is sampled using rcu_sys_is_idle(), which
also drives the state machine if RCU is idle (and does so by forcing
RCU to become non-idle). This function returns true if all but the
timekeeping CPU (tick_do_timer_cpu) are idle and have been idle long
enough to avoid memory contention on the full_sysidle_state state
variable. The rcu_sysidle_force_exit() may be called externally
to reset the state machine back into non-idle state.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/rcupdate.h | 18 ++++
kernel/rcutree.c | 16 ++-
kernel/rcutree.h | 5 +
kernel/rcutree_plugin.h | 273 ++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 305 insertions(+), 7 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 48f1ef9..1aa8d8c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1011,4 +1011,22 @@ static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+/* Only for use by adaptive-ticks code. */
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+extern bool rcu_sys_is_idle(void);
+extern void rcu_sysidle_force_exit(void);
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+
+static inline bool rcu_sys_is_idle(void)
+{
+ return false;
+}
+
+static inline void rcu_sysidle_force_exit(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+
+
#endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 8f5d9f2..10e74d6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -721,6 +721,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
bool *isidle, unsigned long *maxj)
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+ rcu_sysidle_check_cpu(rdp, isidle, maxj);
return (rdp->dynticks_snap & 0x1) == 0;
}
@@ -1359,11 +1360,17 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
rsp->n_force_qs++;
if (fqs_state == RCU_SAVE_DYNTICK) {
/* Collect dyntick-idle snapshots. */
+ if (is_sysidle_rcu_state(rsp)) {
+ isidle = 1;
+ maxj = jiffies - ULONG_MAX / 4;
+ }
force_qs_rnp(rsp, dyntick_save_progress_counter,
&isidle, &maxj);
+ rcu_sysidle_report(rsp, isidle, maxj);
fqs_state = RCU_FORCE_QS;
} else {
/* Handle dyntick-idle and offline CPUs. */
+ isidle = 0;
force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
}
/* Clear flag to prevent immediate re-entry. */
@@ -2090,9 +2097,12 @@ static void force_qs_rnp(struct rcu_state *rsp,
cpu = rnp->grplo;
bit = 1;
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
- if ((rnp->qsmask & bit) != 0 &&
- f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
- mask |= bit;
+ if ((rnp->qsmask & bit) != 0) {
+ if ((rnp->qsmaskinit & bit) != 0)
+ *isidle = 0;
+ if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+ mask |= bit;
+ }
}
if (mask != 0) {
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 11d7144..8e869fc 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -558,6 +558,11 @@ static bool init_nocb_callback_list(struct rcu_data *rdp);
static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
static void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp);
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+ unsigned long *maxj);
+static bool is_sysidle_rcu_state(struct rcu_state *rsp);
+static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
+ unsigned long maxj);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 349f426..c3ce268 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -28,7 +28,7 @@
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/smpboot.h>
-#include <linux/tick.h>
+#include "time/tick-internal.h"
#define RCU_KTHREAD_PRIO 1
@@ -2395,12 +2395,12 @@ static void rcu_kick_nohz_cpu(int cpu)
* most active flavor of RCU.
*/
#ifdef CONFIG_PREEMPT_RCU
-static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_preempt_state;
+static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
#else /* #ifdef CONFIG_PREEMPT_RCU */
-static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_sched_state;
+static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-static int __maybe_unused full_sysidle_state; /* Current system-idle state. */
+static int full_sysidle_state; /* Current system-idle state. */
#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
#define RCU_SYSIDLE_FULL 2 /* All CPUs idle, ready for sysidle. */
@@ -2443,6 +2443,38 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
}
/*
+ * Unconditionally force exit from full system-idle state. This is
+ * invoked when a normal CPU exits idle, but must be called separately
+ * for the timekeeping CPU (tick_do_timer_cpu). The reason for this
+ * is that the timekeeping CPU is permitted to take scheduling-clock
+ * interrupts while the system is in system-idle state, and of course
+ * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
+ * interrupt from any other type of interrupt.
+ */
+void rcu_sysidle_force_exit(void)
+{
+ int oldstate = ACCESS_ONCE(full_sysidle_state);
+ int newoldstate;
+
+ /*
+ * Each pass through the following loop attempts to exit full
+ * system-idle state. If contention proves to be a problem,
+ * a trylock-based contention tree could be used here.
+ */
+ while (oldstate > RCU_SYSIDLE_SHORT) {
+ newoldstate = cmpxchg(&full_sysidle_state,
+ oldstate, RCU_SYSIDLE_NOT);
+ if (oldstate == newoldstate &&
+ oldstate == RCU_SYSIDLE_FULL_NOTED) {
+ rcu_kick_nohz_cpu(tick_do_timer_cpu);
+ return; /* We cleared it, done! */
+ }
+ oldstate = newoldstate;
+ }
+ smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
+}
+
+/*
* Invoked to note entry to irq or task transition from idle. Note that
* usermode execution does -not- count as idle here! The caller must
* have disabled interrupts.
@@ -2475,14 +2507,232 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
atomic_inc(&rdtp->dynticks_idle);
smp_mb__after_atomic_inc();
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
+
+ /*
+ * If we are the timekeeping CPU, we are permitted to be non-idle
+ * during a system-idle state. This must be the case, because
+ * the timekeeping CPU has to take scheduling-clock interrupts
+ * during the time that the system is transitioning to full
+ * system-idle state. This means that the timekeeping CPU must
+ * invoke rcu_sysidle_force_exit() directly if it does anything
+ * more than take a scheduling-clock interrupt.
+ */
+ if (smp_processor_id() == tick_do_timer_cpu)
+ return;
+
+ /* Update system-idle state: We are clearly no longer fully idle! */
+ rcu_sysidle_force_exit();
}
+/*
+ * Record the jiffies that this NMI occurred at.
+ */
static inline void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp)
{
rdtp->dynticks_nmi_jiffies = jiffies;
}
/*
+ * Check to see if the current CPU is idle. Note that usermode execution
+ * does not count as idle. The caller must have disabled interrupts.
+ */
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+ unsigned long *maxj)
+{
+ int cur;
+ int curnmi;
+ unsigned long j;
+ unsigned long jnmi;
+ struct rcu_dynticks *rdtp = rdp->dynticks;
+
+ /*
+ * If some other CPU has already reported non-idle, if this is
+ * not the flavor of RCU that tracks sysidle state, or if this
+ * is an offline or the timekeeping CPU, nothing to do.
+ */
+ if (!*isidle || rdp->rsp != rcu_sysidle_state ||
+ cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
+ return;
+ /* WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); */
+
+ /*
+ * Pick up current idle and NMI-nesting counters, check. We check
+ * for NMIs using RCU's main ->dynticks counter. This works because
+ * any time ->dynticks has its low bit set, ->dynticks_idle will
+ * too -- unless the only reason that ->dynticks's low bit is set
+ * is due to an NMI from idle. Which is exactly the case we need
+ * to account for.
+ */
+ cur = atomic_read(&rdtp->dynticks_idle);
+ curnmi = atomic_read(&rdtp->dynticks);
+ if ((cur & 0x1) || (curnmi & 0x1)) {
+ *isidle = 0; /* We are not idle! */
+ return;
+ }
+ smp_mb(); /* Read counters before timestamps. */
+
+ /* Pick up timestamps. */
+ j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
+ jnmi = ACCESS_ONCE(rdtp->dynticks_nmi_jiffies);
+ if (ULONG_CMP_LT(j, jnmi)) {
+ j = jnmi;
+ ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = jnmi;
+ }
+
+ /* If this CPU entered idle more recently, update maxj timestamp. */
+ if (ULONG_CMP_LT(*maxj, j))
+ *maxj = j;
+}
+
+/*
+ * Is this the flavor of RCU that is handling full-system idle?
+ */
+static bool is_sysidle_rcu_state(struct rcu_state *rsp)
+{
+ return rsp == rcu_sysidle_state;
+}
+
+/*
+ * Return a delay in jiffies based on the number of CPUs, rcu_node
+ * leaf fanout, and jiffies tick rate. The idea is to allow larger
+ * systems more time to transition to full-idle state in order to
+ * avoid the cache thrashing that otherwise occur on the state variable.
+ * Really small systems (less than a couple of tens of CPUs) should
+ * instead use a single global atomically incremented counter, and later
+ * versions of this will automatically reconfigure themselves accordingly.
+ */
+static unsigned long rcu_sysidle_delay(void)
+{
+ if (nr_cpu_ids <= RCU_SYSIDLE_SMALL)
+ return 0;
+ return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
+}
+
+/*
+ * Advance the full-system-idle state. This is invoked when all of
+ * the non-timekeeping CPUs are idle.
+ */
+static void rcu_sysidle(unsigned long j)
+{
+ /* Check the current state. */
+ switch (ACCESS_ONCE(full_sysidle_state)) {
+ case RCU_SYSIDLE_NOT:
+
+ /* First time all are idle, so note a short idle period. */
+ ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
+ break;
+
+ case RCU_SYSIDLE_SHORT:
+
+ /*
+ * Idle for a bit, time to advance to next state?
+ * cmpxchg failure means race with non-idle, let them win.
+ */
+ if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
+ (void)cmpxchg(&full_sysidle_state,
+ RCU_SYSIDLE_SHORT, RCU_SYSIDLE_FULL);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Found a non-idle non-timekeeping CPU, so kick the system-idle state
+ * back to the beginning.
+ */
+static void rcu_sysidle_cancel(void)
+{
+ smp_mb();
+ ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+}
+
+/*
+ * Update the sysidle state based on the results of a force-quiescent-state
+ * scan of the CPUs' dyntick-idle state.
+ */
+static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
+ unsigned long maxj)
+{
+ if (rsp != rcu_sysidle_state)
+ return; /* Wrong flavor, ignore. */
+ if (isidle)
+ rcu_sysidle(maxj); /* More idle! */
+ else
+ rcu_sysidle_cancel(); /* Idle is over. */
+}
+
+/* Callback and function for forcing an RCU grace period. */
+struct rcu_sysidle_head {
+ struct rcu_head rh;
+ int inuse;
+};
+
+static void rcu_sysidle_cb(struct rcu_head *rhp)
+{
+ struct rcu_sysidle_head *rshp;
+
+ smp_mb(); /* grace period precedes setting inuse. */
+ rshp = container_of(rhp, struct rcu_sysidle_head, rh);
+ ACCESS_ONCE(rshp->inuse) = 0;
+}
+
+/*
+ * Check to see if the system is fully idle, other than the timekeeping CPU.
+ * The caller must have disabled interrupts.
+ */
+bool rcu_sys_is_idle(void)
+{
+ static struct rcu_sysidle_head rsh;
+ int rss = ACCESS_ONCE(full_sysidle_state);
+
+ WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
+
+ /* Handle small-system case by doing a full scan of CPUs. */
+ if (nr_cpu_ids <= RCU_SYSIDLE_SMALL && rss < RCU_SYSIDLE_FULL) {
+ int cpu;
+ bool isidle = true;
+ unsigned long maxj = jiffies - ULONG_MAX / 4;
+ struct rcu_data *rdp;
+
+ /* Scan all the CPUs looking for nonidle CPUs. */
+ for_each_possible_cpu(cpu) {
+ rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
+ rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
+ if (!isidle)
+ break;
+ }
+ rcu_sysidle_report(rcu_sysidle_state, isidle, maxj);
+ rss = ACCESS_ONCE(full_sysidle_state);
+ }
+
+ /* If this is the first observation of an idle period, record it. */
+ if (rss == RCU_SYSIDLE_FULL) {
+ rss = cmpxchg(&full_sysidle_state,
+ RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
+ return rss == RCU_SYSIDLE_FULL;
+ }
+
+ smp_mb(); /* ensure rss load happens before later caller actions. */
+
+ /* If already fully idle, tell the caller (in case of races). */
+ if (rss == RCU_SYSIDLE_FULL_NOTED)
+ return true;
+
+ /*
+ * If we aren't there yet, and a grace period is not in flight,
+ * initiate a grace period. Either way, tell the caller that
+ * we are not there yet.
+ */
+ if (nr_cpu_ids > RCU_SYSIDLE_SMALL &&
+ !rcu_gp_in_progress(rcu_sysidle_state) &&
+ !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
+ call_rcu(&rsh.rh, rcu_sysidle_cb);
+ return false;
+}
+
+/*
* Initialize dynticks sysidle state for CPUs coming online.
*/
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
@@ -2504,6 +2754,21 @@ static inline void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp)
{
}
+static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
+ unsigned long *maxj)
+{
+}
+
+static bool is_sysidle_rcu_state(struct rcu_state *rsp)
+{
+ return false;
+}
+
+static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
+ unsigned long maxj)
+{
+}
+
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
{
}
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH RFC nohz_full 8/8] nohz_full: Force RCU's grace-period kthreads onto timekeeping CPU
2013-06-25 21:37 ` [PATCH RFC nohz_full 1/8] nohz_full: Add Kconfig parameter for scalable detection of all-idle state Paul E. McKenney
` (5 preceding siblings ...)
2013-06-25 21:37 ` [PATCH RFC nohz_full 7/8] nohz_full: Add full-system-idle state machine Paul E. McKenney
@ 2013-06-25 21:37 ` Paul E. McKenney
6 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2013-06-25 21:37 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, dhowells, edumazet, darren, fweisbec, sbw,
Paul E. McKenney
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Because RCU's quiescent-state-forcing mechanism is used to drive the
full-system-idle state machine, and because this mechanism is executed
by RCU's grace-period kthreads, this commit forces these kthreads to
run on the timekeeping CPU (tick_do_timer_cpu). To do otherwise would
mean that the RCU grace-period kthreads would force the system into
non-idle state every time they drove the state machine, which would
be just a bit on the futile side.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
kernel/rcutree.c | 1 +
kernel/rcutree.h | 1 +
kernel/rcutree_plugin.h | 20 +++++++++++++++++++-
3 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 10e74d6..0fe28ed 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1289,6 +1289,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
+ rcu_bind_gp_kthread();
raw_spin_lock_irq(&rnp->lock);
rsp->gp_flags = 0; /* Clear all flags: New grace period. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 8e869fc..7a8cdb4 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -561,6 +561,7 @@ static void rcu_sysidle_nmi_jiffies(struct rcu_dynticks *rdtp);
static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
unsigned long *maxj);
static bool is_sysidle_rcu_state(struct rcu_state *rsp);
+static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
unsigned long maxj);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c3ce268..39a3091 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2553,7 +2553,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
if (!*isidle || rdp->rsp != rcu_sysidle_state ||
cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
return;
- /* WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); */
+ WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
/*
* Pick up current idle and NMI-nesting counters, check. We check
@@ -2593,6 +2593,20 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
}
/*
+ * Bind the grace-period kthread for the sysidle flavor of RCU to the
+ * timekeeping CPU.
+ */
+static void rcu_bind_gp_kthread(void)
+{
+ int cpu = ACCESS_ONCE(tick_do_timer_cpu);
+
+ if (cpu < 0 || cpu >= nr_cpu_ids)
+ return;
+ if (raw_smp_processor_id() != cpu)
+ set_cpus_allowed_ptr(current, cpumask_of(cpu));
+}
+
+/*
* Return a delay in jiffies based on the number of CPUs, rcu_node
* leaf fanout, and jiffies tick rate. The idea is to allow larger
* systems more time to transition to full-idle state in order to
@@ -2764,6 +2778,10 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
return false;
}
+static void rcu_bind_gp_kthread(void)
+{
+}
+
static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
unsigned long maxj)
{
--
1.8.1.5
^ permalink raw reply related [flat|nested] 17+ messages in thread