* [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous @ 2008-10-24 5:38 Lai Jiangshan 2008-10-26 21:55 ` Paul E. McKenney 0 siblings, 1 reply; 4+ messages in thread From: Lai Jiangshan @ 2008-10-24 5:38 UTC (permalink / raw) To: Paul E. McKenney, Ingo Molnar; +Cc: Linux Kernel Mailing List rcu_barrier() queues rcu_head on all cpus, it will brings large overhead for a large system which has a lots cpu. this fix reduces sys's overhead when rcu_barrier()s called simultaneous. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> --- diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ad63af8..734850b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -53,6 +53,9 @@ enum rcu_barrier { static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; +static unsigned long rcu_barrier_completed; +static unsigned long rcu_barrier_completed_bh; +static unsigned long rcu_barrier_completed_sched; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -60,7 +63,7 @@ static struct completion rcu_barrier_completion; * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ -void wakeme_after_rcu(struct rcu_head *head) +void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; @@ -113,11 +116,20 @@ static void rcu_barrier_func(void *type) * Orchestrate the specified type of RCU barrier, waiting for all * RCU callbacks of the specified type to complete. */ -static void _rcu_barrier(enum rcu_barrier type) +static void _rcu_barrier(enum rcu_barrier type, unsigned long *completed) { + unsigned long batch = ACCESS_ONCE(*completed); BUG_ON(in_interrupt()); /* Take cpucontrol mutex to protect against CPU hotplug */ mutex_lock(&rcu_barrier_mutex); + + BUG_ON(*completed & 1); + if ((*completed - batch) >= 2) { + mutex_unlock(&rcu_barrier_mutex); + return; + } + (*completed)++; + init_completion(&rcu_barrier_completion); /* * Initialize rcu_barrier_cpu_count to 1, then invoke @@ -133,6 +145,7 @@ static void _rcu_barrier(enum rcu_barrier type) if (atomic_dec_and_test(&rcu_barrier_cpu_count)) complete(&rcu_barrier_completion); wait_for_completion(&rcu_barrier_completion); + (*completed)++; mutex_unlock(&rcu_barrier_mutex); } @@ -141,7 +154,7 @@ static void _rcu_barrier(enum rcu_barrier type) */ void rcu_barrier(void) { - _rcu_barrier(RCU_BARRIER_STD); + _rcu_barrier(RCU_BARRIER_STD, &rcu_barrier_completed); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -150,7 +163,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ void rcu_barrier_bh(void) { - _rcu_barrier(RCU_BARRIER_BH); + _rcu_barrier(RCU_BARRIER_BH, &rcu_barrier_completed_bh); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -159,7 +172,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); */ void rcu_barrier_sched(void) { - _rcu_barrier(RCU_BARRIER_SCHED); + _rcu_barrier(RCU_BARRIER_SCHED, &rcu_barrier_completed_sched); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous 2008-10-24 5:38 [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous Lai Jiangshan @ 2008-10-26 21:55 ` Paul E. McKenney 2008-10-27 5:49 ` Lai Jiangshan 0 siblings, 1 reply; 4+ messages in thread From: Paul E. McKenney @ 2008-10-26 21:55 UTC (permalink / raw) To: Lai Jiangshan; +Cc: Ingo Molnar, Linux Kernel Mailing List On Fri, Oct 24, 2008 at 01:38:13PM +0800, Lai Jiangshan wrote: > > rcu_barrier() queues rcu_head on all cpus, it will brings > large overhead for a large system which has a lots cpu. > this fix reduces sys's overhead when rcu_barrier()s called > simultaneous. Hello, Jiangshan, If we were to have problems with many concurrent rcu_barrier() calls stacking up, this patch looks like it would be a reasonable was of addressing those problems. But do we really have problems with this at the moment? Thanx, Paul > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> > --- > diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c > index ad63af8..734850b 100644 > --- a/kernel/rcupdate.c > +++ b/kernel/rcupdate.c > @@ -53,6 +53,9 @@ enum rcu_barrier { > > static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; > static atomic_t rcu_barrier_cpu_count; > +static unsigned long rcu_barrier_completed; > +static unsigned long rcu_barrier_completed_bh; > +static unsigned long rcu_barrier_completed_sched; > static DEFINE_MUTEX(rcu_barrier_mutex); > static struct completion rcu_barrier_completion; > > @@ -60,7 +63,7 @@ static struct completion rcu_barrier_completion; > * Awaken the corresponding synchronize_rcu() instance now that a > * grace period has elapsed. > */ > -void wakeme_after_rcu(struct rcu_head *head) > +void wakeme_after_rcu(struct rcu_head *head) > { > struct rcu_synchronize *rcu; > > @@ -113,11 +116,20 @@ static void rcu_barrier_func(void *type) > * Orchestrate the specified type of RCU barrier, waiting for all > * RCU callbacks of the specified type to complete. > */ > -static void _rcu_barrier(enum rcu_barrier type) > +static void _rcu_barrier(enum rcu_barrier type, unsigned long *completed) > { > + unsigned long batch = ACCESS_ONCE(*completed); > BUG_ON(in_interrupt()); > /* Take cpucontrol mutex to protect against CPU hotplug */ > mutex_lock(&rcu_barrier_mutex); > + > + BUG_ON(*completed & 1); > + if ((*completed - batch) >= 2) { > + mutex_unlock(&rcu_barrier_mutex); > + return; > + } > + (*completed)++; > + > init_completion(&rcu_barrier_completion); > /* > * Initialize rcu_barrier_cpu_count to 1, then invoke > @@ -133,6 +145,7 @@ static void _rcu_barrier(enum rcu_barrier type) > if (atomic_dec_and_test(&rcu_barrier_cpu_count)) > complete(&rcu_barrier_completion); > wait_for_completion(&rcu_barrier_completion); > + (*completed)++; > mutex_unlock(&rcu_barrier_mutex); > } > > @@ -141,7 +154,7 @@ static void _rcu_barrier(enum rcu_barrier type) > */ > void rcu_barrier(void) > { > - _rcu_barrier(RCU_BARRIER_STD); > + _rcu_barrier(RCU_BARRIER_STD, &rcu_barrier_completed); > } > EXPORT_SYMBOL_GPL(rcu_barrier); > > @@ -150,7 +163,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); > */ > void rcu_barrier_bh(void) > { > - _rcu_barrier(RCU_BARRIER_BH); > + _rcu_barrier(RCU_BARRIER_BH, &rcu_barrier_completed_bh); > } > EXPORT_SYMBOL_GPL(rcu_barrier_bh); > > @@ -159,7 +172,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); > */ > void rcu_barrier_sched(void) > { > - _rcu_barrier(RCU_BARRIER_SCHED); > + _rcu_barrier(RCU_BARRIER_SCHED, &rcu_barrier_completed_sched); > } > EXPORT_SYMBOL_GPL(rcu_barrier_sched); > > > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous 2008-10-26 21:55 ` Paul E. McKenney @ 2008-10-27 5:49 ` Lai Jiangshan 2008-10-27 22:34 ` Paul E. McKenney 0 siblings, 1 reply; 4+ messages in thread From: Lai Jiangshan @ 2008-10-27 5:49 UTC (permalink / raw) To: paulmck; +Cc: Ingo Molnar, Linux Kernel Mailing List Paul E. McKenney wrote: > On Fri, Oct 24, 2008 at 01:38:13PM +0800, Lai Jiangshan wrote: >> rcu_barrier() queues rcu_head on all cpus, it will brings >> large overhead for a large system which has a lots cpu. >> this fix reduces sys's overhead when rcu_barrier()s called >> simultaneous. > > Hello, Jiangshan, > > If we were to have problems with many concurrent rcu_barrier() > calls stacking up, this patch looks like it would be a reasonable > was of addressing those problems. > > But do we really have problems with this at the moment? > > Thanx, Paul Hi, Paul, Thanks, we do not have problems with this at the moment. I suddenly had an association of ideas to synchronize_srcu(), so I made use of the ideas in synchronize_srcu() and this patch was made. Thanx, Lai. > >> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> >> --- >> diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c >> index ad63af8..734850b 100644 >> --- a/kernel/rcupdate.c >> +++ b/kernel/rcupdate.c >> @@ -53,6 +53,9 @@ enum rcu_barrier { >> >> static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; >> static atomic_t rcu_barrier_cpu_count; >> +static unsigned long rcu_barrier_completed; >> +static unsigned long rcu_barrier_completed_bh; >> +static unsigned long rcu_barrier_completed_sched; >> static DEFINE_MUTEX(rcu_barrier_mutex); >> static struct completion rcu_barrier_completion; >> >> @@ -60,7 +63,7 @@ static struct completion rcu_barrier_completion; >> * Awaken the corresponding synchronize_rcu() instance now that a >> * grace period has elapsed. >> */ >> -void wakeme_after_rcu(struct rcu_head *head) >> +void wakeme_after_rcu(struct rcu_head *head) >> { >> struct rcu_synchronize *rcu; >> >> @@ -113,11 +116,20 @@ static void rcu_barrier_func(void *type) >> * Orchestrate the specified type of RCU barrier, waiting for all >> * RCU callbacks of the specified type to complete. >> */ >> -static void _rcu_barrier(enum rcu_barrier type) >> +static void _rcu_barrier(enum rcu_barrier type, unsigned long *completed) >> { >> + unsigned long batch = ACCESS_ONCE(*completed); >> BUG_ON(in_interrupt()); >> /* Take cpucontrol mutex to protect against CPU hotplug */ >> mutex_lock(&rcu_barrier_mutex); >> + >> + BUG_ON(*completed & 1); >> + if ((*completed - batch) >= 2) { >> + mutex_unlock(&rcu_barrier_mutex); >> + return; >> + } >> + (*completed)++; >> + >> init_completion(&rcu_barrier_completion); >> /* >> * Initialize rcu_barrier_cpu_count to 1, then invoke >> @@ -133,6 +145,7 @@ static void _rcu_barrier(enum rcu_barrier type) >> if (atomic_dec_and_test(&rcu_barrier_cpu_count)) >> complete(&rcu_barrier_completion); >> wait_for_completion(&rcu_barrier_completion); >> + (*completed)++; >> mutex_unlock(&rcu_barrier_mutex); >> } >> >> @@ -141,7 +154,7 @@ static void _rcu_barrier(enum rcu_barrier type) >> */ >> void rcu_barrier(void) >> { >> - _rcu_barrier(RCU_BARRIER_STD); >> + _rcu_barrier(RCU_BARRIER_STD, &rcu_barrier_completed); >> } >> EXPORT_SYMBOL_GPL(rcu_barrier); >> >> @@ -150,7 +163,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); >> */ >> void rcu_barrier_bh(void) >> { >> - _rcu_barrier(RCU_BARRIER_BH); >> + _rcu_barrier(RCU_BARRIER_BH, &rcu_barrier_completed_bh); >> } >> EXPORT_SYMBOL_GPL(rcu_barrier_bh); >> >> @@ -159,7 +172,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); >> */ >> void rcu_barrier_sched(void) >> { >> - _rcu_barrier(RCU_BARRIER_SCHED); >> + _rcu_barrier(RCU_BARRIER_SCHED, &rcu_barrier_completed_sched); >> } >> EXPORT_SYMBOL_GPL(rcu_barrier_sched); >> >> >> > > > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous 2008-10-27 5:49 ` Lai Jiangshan @ 2008-10-27 22:34 ` Paul E. McKenney 0 siblings, 0 replies; 4+ messages in thread From: Paul E. McKenney @ 2008-10-27 22:34 UTC (permalink / raw) To: Lai Jiangshan; +Cc: Ingo Molnar, Linux Kernel Mailing List On Mon, Oct 27, 2008 at 01:49:49PM +0800, Lai Jiangshan wrote: > Paul E. McKenney wrote: > > On Fri, Oct 24, 2008 at 01:38:13PM +0800, Lai Jiangshan wrote: > >> rcu_barrier() queues rcu_head on all cpus, it will brings > >> large overhead for a large system which has a lots cpu. > >> this fix reduces sys's overhead when rcu_barrier()s called > >> simultaneous. > > > > Hello, Jiangshan, > > > > If we were to have problems with many concurrent rcu_barrier() > > calls stacking up, this patch looks like it would be a reasonable > > was of addressing those problems. > > > > But do we really have problems with this at the moment? > > > > Thanx, Paul > > Hi, Paul, > > Thanks, we do not have problems with this at the moment. > > I suddenly had an association of ideas to synchronize_srcu(), > so I made use of the ideas in synchronize_srcu() and this patch was made. Absolutely nothing wrong with that! ;-) We should make sure that this patch is kept somewhere so that if problems arise, it can be brought to bear quickly and easily. One approach would be to keep it in a git tree, but that would likely incur merge overhead. Another approach would be to have an RCU web page maintaining patches such as this one. Thoughts? Thanx, Paul > Thanx, Lai. > > > > > >> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> > >> --- > >> diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c > >> index ad63af8..734850b 100644 > >> --- a/kernel/rcupdate.c > >> +++ b/kernel/rcupdate.c > >> @@ -53,6 +53,9 @@ enum rcu_barrier { > >> > >> static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; > >> static atomic_t rcu_barrier_cpu_count; > >> +static unsigned long rcu_barrier_completed; > >> +static unsigned long rcu_barrier_completed_bh; > >> +static unsigned long rcu_barrier_completed_sched; > >> static DEFINE_MUTEX(rcu_barrier_mutex); > >> static struct completion rcu_barrier_completion; > >> > >> @@ -60,7 +63,7 @@ static struct completion rcu_barrier_completion; > >> * Awaken the corresponding synchronize_rcu() instance now that a > >> * grace period has elapsed. > >> */ > >> -void wakeme_after_rcu(struct rcu_head *head) > >> +void wakeme_after_rcu(struct rcu_head *head) > >> { > >> struct rcu_synchronize *rcu; > >> > >> @@ -113,11 +116,20 @@ static void rcu_barrier_func(void *type) > >> * Orchestrate the specified type of RCU barrier, waiting for all > >> * RCU callbacks of the specified type to complete. > >> */ > >> -static void _rcu_barrier(enum rcu_barrier type) > >> +static void _rcu_barrier(enum rcu_barrier type, unsigned long *completed) > >> { > >> + unsigned long batch = ACCESS_ONCE(*completed); > >> BUG_ON(in_interrupt()); > >> /* Take cpucontrol mutex to protect against CPU hotplug */ > >> mutex_lock(&rcu_barrier_mutex); > >> + > >> + BUG_ON(*completed & 1); > >> + if ((*completed - batch) >= 2) { > >> + mutex_unlock(&rcu_barrier_mutex); > >> + return; > >> + } > >> + (*completed)++; > >> + > >> init_completion(&rcu_barrier_completion); > >> /* > >> * Initialize rcu_barrier_cpu_count to 1, then invoke > >> @@ -133,6 +145,7 @@ static void _rcu_barrier(enum rcu_barrier type) > >> if (atomic_dec_and_test(&rcu_barrier_cpu_count)) > >> complete(&rcu_barrier_completion); > >> wait_for_completion(&rcu_barrier_completion); > >> + (*completed)++; > >> mutex_unlock(&rcu_barrier_mutex); > >> } > >> > >> @@ -141,7 +154,7 @@ static void _rcu_barrier(enum rcu_barrier type) > >> */ > >> void rcu_barrier(void) > >> { > >> - _rcu_barrier(RCU_BARRIER_STD); > >> + _rcu_barrier(RCU_BARRIER_STD, &rcu_barrier_completed); > >> } > >> EXPORT_SYMBOL_GPL(rcu_barrier); > >> > >> @@ -150,7 +163,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); > >> */ > >> void rcu_barrier_bh(void) > >> { > >> - _rcu_barrier(RCU_BARRIER_BH); > >> + _rcu_barrier(RCU_BARRIER_BH, &rcu_barrier_completed_bh); > >> } > >> EXPORT_SYMBOL_GPL(rcu_barrier_bh); > >> > >> @@ -159,7 +172,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); > >> */ > >> void rcu_barrier_sched(void) > >> { > >> - _rcu_barrier(RCU_BARRIER_SCHED); > >> + _rcu_barrier(RCU_BARRIER_SCHED, &rcu_barrier_completed_sched); > >> } > >> EXPORT_SYMBOL_GPL(rcu_barrier_sched); > >> > >> > >> > > > > > > > > ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2008-10-27 22:34 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-10-24 5:38 [PATCH] rcupdate: reduce sys's overhead when rcu_barrier()s called simultaneous Lai Jiangshan 2008-10-26 21:55 ` Paul E. McKenney 2008-10-27 5:49 ` Lai Jiangshan 2008-10-27 22:34 ` Paul E. McKenney
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.