* [PATCH] rcu: check dynticks idle cpu when start gp
@ 2010-03-28 3:31 Lai Jiangshan
2010-03-28 4:22 ` Paul E. McKenney
0 siblings, 1 reply; 2+ messages in thread
From: Lai Jiangshan @ 2010-03-28 3:31 UTC (permalink / raw)
To: Paul E. McKenney, Ingo Molnar, LKML
Currently, if there are/is dynticks idle CPU(s), GPs
can not end until we force_quiescent_state().
It means we delay our GP frequently.
It also means we do non-fastpath work(force QS) frequently.
This patch add a _hint_ mask to struct rcu_node, and
check dynticks idle cpu when start gp.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1947c4e..a85c5c4 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -666,6 +666,62 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
__note_new_gpnum(rsp, rnp, rdp);
}
+static void rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
+ struct rcu_node *rnp, unsigned long flags);
+
+static inline void rcu_check_recent_offcpus(struct rcu_state *rsp,
+ struct rcu_node *rnp, unsigned long flags)
+ __releases(rnp->lock)
+{
+#ifdef CONFIG_SMP
+#if defined(CONFIG_NO_HZ) || defined(CONFIG_HOTPLUG_CPU)
+ /*
+ * if it is leaf node and there are some CPU being dynticks state
+ * or offline-ing recently, we will check and report qs
+ * for these CPU when needed.
+ * (for nonleaf node, rnp->offmask is always 0)
+ */
+ if (rnp->qsmask & rnp->offmask) {
+ int cpu;
+ unsigned long bit;
+ unsigned long mask = rnp->qsmask & rnp->offmask;
+ struct rcu_data *rdp;
+ long curr, curr_nmi;
+ int off;
+
+ cpu = rnp->grplo;
+ bit = 1;
+ for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
+ if (!(mask & bit))
+ continue;
+
+ rdp = rsp->rda[cpu];
+ curr = rdp->dynticks->dynticks;
+ curr_nmi = rdp->dynticks->dynticks_nmi;
+
+ off = ((curr & 0x1) == 0) && ((curr_nmi & 0x1) == 0);
+ off = off || cpu_is_offline(rdp->cpu);
+
+ if (!off)
+ mask &= ~bit;
+ }
+
+ rnp->offmask = mask;
+ if (mask) {
+ /*
+ * It is impossible that rcu_report_qs_rnp()
+ * calls rcu_report_qs_rsp() again. So we
+ * will not cause recursion.
+ */
+ rcu_report_qs_rnp(mask, rsp, rnp, flags);
+ return;
+ }
+ }
+#endif
+#endif
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
@@ -717,7 +773,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ rcu_check_recent_offcpus(rsp, rnp, flags);
return;
}
@@ -745,14 +801,20 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
* irqs disabled.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+ unsigned long tmp_flags;
+
+ /* irqs already disabled */
+ raw_spin_lock_irqsave(&rnp->lock, tmp_flags);
+
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+
+ rcu_check_recent_offcpus(rsp, rnp, tmp_flags);
+ /* irqs remain disabled. */
}
rnp = rcu_get_root(rsp);
@@ -1190,8 +1252,10 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
cpu = rnp->grplo;
bit = 1;
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
- if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
+ if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) {
mask |= bit;
+ rnp->offmask |= bit;
+ }
}
if (mask != 0) {
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4a525a3..8ec96a7 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -104,6 +104,8 @@ struct rcu_node {
/* an rcu_data structure, otherwise, each */
/* bit corresponds to a child rcu_node */
/* structure. */
+ unsigned long offmask; /* Track CPUs which are dynticks state */
+ /* or offline-ing recently. */
unsigned long expmask; /* Groups that have ->blocked_tasks[] */
/* elements that need to drain to allow the */
/* current expedited grace period to */
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] rcu: check dynticks idle cpu when start gp
2010-03-28 3:31 [PATCH] rcu: check dynticks idle cpu when start gp Lai Jiangshan
@ 2010-03-28 4:22 ` Paul E. McKenney
0 siblings, 0 replies; 2+ messages in thread
From: Paul E. McKenney @ 2010-03-28 4:22 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Ingo Molnar, LKML
On Sun, Mar 28, 2010 at 11:31:44AM +0800, Lai Jiangshan wrote:
>
> Currently, if there are/is dynticks idle CPU(s), GPs
> can not end until we force_quiescent_state().
> It means we delay our GP frequently.
> It also means we do non-fastpath work(force QS) frequently.
>
> This patch add a _hint_ mask to struct rcu_node, and
> check dynticks idle cpu when start gp.
My concern with this one is that it requires the initialization phase
to look at a large number of rcu_data and rcu_dynticks structures, which
would give us a large latency hit. Now we -might- take this hit later
should we need to invoke force_quiescent_state(), but if the machine is
fully busy, even that won't happen.
So I have to pass on this one, sorry!
And I do have a patch that controls the latency of force_quiescent_state(),
but the people who asked for it still haven't tested it. :-/
Thanx, Paul
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 1947c4e..a85c5c4 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -666,6 +666,62 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
> __note_new_gpnum(rsp, rnp, rdp);
> }
>
> +static void rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
> + struct rcu_node *rnp, unsigned long flags);
> +
> +static inline void rcu_check_recent_offcpus(struct rcu_state *rsp,
> + struct rcu_node *rnp, unsigned long flags)
> + __releases(rnp->lock)
> +{
> +#ifdef CONFIG_SMP
> +#if defined(CONFIG_NO_HZ) || defined(CONFIG_HOTPLUG_CPU)
> + /*
> + * if it is leaf node and there are some CPU being dynticks state
> + * or offline-ing recently, we will check and report qs
> + * for these CPU when needed.
> + * (for nonleaf node, rnp->offmask is always 0)
> + */
> + if (rnp->qsmask & rnp->offmask) {
> + int cpu;
> + unsigned long bit;
> + unsigned long mask = rnp->qsmask & rnp->offmask;
> + struct rcu_data *rdp;
> + long curr, curr_nmi;
> + int off;
> +
> + cpu = rnp->grplo;
> + bit = 1;
> + for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
> + if (!(mask & bit))
> + continue;
> +
> + rdp = rsp->rda[cpu];
> + curr = rdp->dynticks->dynticks;
> + curr_nmi = rdp->dynticks->dynticks_nmi;
> +
> + off = ((curr & 0x1) == 0) && ((curr_nmi & 0x1) == 0);
> + off = off || cpu_is_offline(rdp->cpu);
> +
> + if (!off)
> + mask &= ~bit;
> + }
> +
> + rnp->offmask = mask;
> + if (mask) {
> + /*
> + * It is impossible that rcu_report_qs_rnp()
> + * calls rcu_report_qs_rsp() again. So we
> + * will not cause recursion.
> + */
> + rcu_report_qs_rnp(mask, rsp, rnp, flags);
> + return;
> + }
> + }
> +#endif
> +#endif
> + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> +}
> +
> /*
> * Start a new RCU grace period if warranted, re-initializing the hierarchy
> * in preparation for detecting the next grace period. The caller must hold
> @@ -717,7 +773,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
> rnp->completed = rsp->completed;
> rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
> rcu_start_gp_per_cpu(rsp, rnp, rdp);
> - raw_spin_unlock_irqrestore(&rnp->lock, flags);
> + rcu_check_recent_offcpus(rsp, rnp, flags);
> return;
> }
>
> @@ -745,14 +801,20 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
> * irqs disabled.
> */
> rcu_for_each_node_breadth_first(rsp, rnp) {
> - raw_spin_lock(&rnp->lock); /* irqs already disabled. */
> + unsigned long tmp_flags;
> +
> + /* irqs already disabled */
> + raw_spin_lock_irqsave(&rnp->lock, tmp_flags);
> +
> rcu_preempt_check_blocked_tasks(rnp);
> rnp->qsmask = rnp->qsmaskinit;
> rnp->gpnum = rsp->gpnum;
> rnp->completed = rsp->completed;
> if (rnp == rdp->mynode)
> rcu_start_gp_per_cpu(rsp, rnp, rdp);
> - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
> +
> + rcu_check_recent_offcpus(rsp, rnp, tmp_flags);
> + /* irqs remain disabled. */
> }
>
> rnp = rcu_get_root(rsp);
> @@ -1190,8 +1252,10 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
> cpu = rnp->grplo;
> bit = 1;
> for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
> - if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
> + if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) {
> mask |= bit;
> + rnp->offmask |= bit;
> + }
> }
> if (mask != 0) {
>
> diff --git a/kernel/rcutree.h b/kernel/rcutree.h
> index 4a525a3..8ec96a7 100644
> --- a/kernel/rcutree.h
> +++ b/kernel/rcutree.h
> @@ -104,6 +104,8 @@ struct rcu_node {
> /* an rcu_data structure, otherwise, each */
> /* bit corresponds to a child rcu_node */
> /* structure. */
> + unsigned long offmask; /* Track CPUs which are dynticks state */
> + /* or offline-ing recently. */
> unsigned long expmask; /* Groups that have ->blocked_tasks[] */
> /* elements that need to drain to allow the */
> /* current expedited grace period to */
>
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2010-03-28 4:22 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-28 3:31 [PATCH] rcu: check dynticks idle cpu when start gp Lai Jiangshan
2010-03-28 4:22 ` Paul E. McKenney
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox