* [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 @ 2008-11-06 6:47 Lai Jiangshan 2008-11-06 6:57 ` Ingo Molnar 2008-11-09 0:51 ` Paul E. McKenney 0 siblings, 2 replies; 16+ messages in thread From: Lai Jiangshan @ 2008-11-06 6:47 UTC (permalink / raw) To: Ingo Molnar Cc: Paul E. McKenney, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List this fix remove ugly macro, and increase readability for rcupdate codes changed from v1: use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of synchronize_sched(). Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> --- diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 5f89b62..68e84ff 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -32,6 +32,7 @@ #ifndef __LINUX_RCUCLASSIC_H #define __LINUX_RCUCLASSIC_H +#define HAVE_SPECIAL_RCU_BH #include <linux/cache.h> #include <linux/spinlock.h> @@ -166,12 +167,7 @@ extern struct lockdep_map rcu_lock_map; local_bh_enable(); \ } while (0) -#define __synchronize_sched() synchronize_rcu() - -#define call_rcu_sched(head, func) call_rcu(head, func) - extern void __rcu_init(void); -#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e..7861bee 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -112,6 +112,7 @@ struct rcu_head { */ #define rcu_read_unlock() __rcu_read_unlock() +#ifdef HAVE_SPECIAL_RCU_BH /** * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section * @@ -125,13 +126,19 @@ struct rcu_head { */ #define rcu_read_lock_bh() __rcu_read_lock_bh() -/* +/** * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ #define rcu_read_unlock_bh() __rcu_read_unlock_bh() +#else +#define rcu_bh_qsctr_inc(cpu) +#define rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } +#define rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } +#endif /* HAVE_SPECIAL_RCU_BH */ + /** * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section * @@ -189,45 +196,6 @@ struct rcu_head { (p) = (v); \ }) -/* Infrastructure to implement the synchronize_() primitives. */ - -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -extern void wakeme_after_rcu(struct rcu_head *head); - -#define synchronize_rcu_xxx(name, func) \ -void name(void) \ -{ \ - struct rcu_synchronize rcu; \ - \ - init_completion(&rcu.completion); \ - /* Will wake me after RCU finished. */ \ - func(&rcu.head, wakeme_after_rcu); \ - /* Wait for it. */ \ - wait_for_completion(&rcu.completion); \ -} - -/** - * synchronize_sched - block until all CPUs have exited any non-preemptive - * kernel code sequences. - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -#define synchronize_sched() __synchronize_sched() - /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. @@ -242,6 +210,11 @@ void name(void) \ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); + +extern void synchronize_rcu(void); +extern void rcu_barrier(void); + +#ifdef HAVE_SPECIAL_RCU_BH /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -263,11 +236,46 @@ extern void call_rcu(struct rcu_head *head, extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); -/* Exported common interfaces */ -extern void synchronize_rcu(void); -extern void rcu_barrier(void); extern void rcu_barrier_bh(void); +#else + +#define call_rcu_bh call_rcu +#define rcu_barrier_bh rcu_barrier + +/* + * Return the number of RCU batches processed thus far. Useful for debug + * and statistic. The _bh variant is identifcal to straight RCU + */ +static inline long rcu_batches_completed_bh(void) +{ + return rcu_batches_completed(); +} +#endif /* HAVE_SPECIAL_RCU_BH */ + +#ifdef HAVE_SPECIAL_RCU_SCHED +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +extern void synchronize_sched(void); extern void rcu_barrier_sched(void); +#else +#define call_rcu_sched call_rcu +#define synchronize_sched synchronize_rcu +#define rcu_barrier_sched rcu_barrier + +#define rcu_init_sched() do { } while (0) +#endif /* HAVE_SPECIAL_RCU_SCHED */ /* Internal to kernel */ extern void rcu_init(void); diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 3e05c09..f30d1c8 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -32,6 +32,7 @@ #ifndef __LINUX_RCUPREEMPT_H #define __LINUX_RCUPREEMPT_H +#define HAVE_SPECIAL_RCU_SCHED #include <linux/cache.h> #include <linux/spinlock.h> @@ -56,54 +57,18 @@ static inline void rcu_qsctr_inc(int cpu) rdssp->sched_qs++; } -#define rcu_bh_qsctr_inc(cpu) - -/* - * Someone might want to pass call_rcu_bh as a function pointer. - * So this needs to just be a rename and not a macro function. - * (no parentheses) - */ -#define call_rcu_bh call_rcu - -/** - * call_rcu_sched - Queue RCU callback for invocation after sched grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period - * - * The update function will be invoked some time after a full - * synchronize_sched()-style grace period elapses, in other words after - * all currently executing preempt-disabled sections of code (including - * hardirq handlers, NMI handlers, and local_irq_save() blocks) have - * completed. - */ -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *head)); extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); -#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } -#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } - -extern void __synchronize_sched(void); - extern void __rcu_init(void); extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); -/* - * Return the number of RCU batches processed thus far. Useful for debug - * and statistic. The _bh variant is identifcal to straight RCU - */ -static inline long rcu_batches_completed_bh(void) -{ - return rcu_batches_completed(); -} - #ifdef CONFIG_RCU_TRACE struct rcupreempt_trace; extern long *rcupreempt_flipctr(int cpu); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ad63af8..70cff32 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -56,11 +56,16 @@ static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ -void wakeme_after_rcu(struct rcu_head *head) +static void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; @@ -77,10 +82,48 @@ void wakeme_after_rcu(struct rcu_head *head) * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ -void synchronize_rcu(void); /* Makes kernel-doc tools happy */ -synchronize_rcu_xxx(synchronize_rcu, call_rcu) +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} EXPORT_SYMBOL_GPL(synchronize_rcu); +#ifdef HAVE_SPECIAL_RCU_SCHED +/** + * synchronize_sched - block until all CPUs have exited any non-preemptive + * kernel code sequences. + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +void synchronize_sched(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_sched); +#endif /* HAVE_SPECIAL_RCU_SCHED */ + static void rcu_barrier_callback(struct rcu_head *notused) { if (atomic_dec_and_test(&rcu_barrier_cpu_count)) @@ -145,6 +188,7 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); +#ifdef HAVE_SPECIAL_RCU_BH /** * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. */ @@ -153,7 +197,9 @@ void rcu_barrier_bh(void) _rcu_barrier(RCU_BARRIER_BH); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); +#endif /* HAVE_SPECIAL_RCU_BH */ +#ifdef HAVE_SPECIAL_RCU_SCHED /** * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. */ @@ -162,6 +208,7 @@ void rcu_barrier_sched(void) _rcu_barrier(RCU_BARRIER_SCHED); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); +#endif /* HAVE_SPECIAL_RCU_SCHED */ void __init rcu_init(void) { diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 59236e8..2068ad9 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1161,15 +1161,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) EXPORT_SYMBOL_GPL(call_rcu_sched); /* - * Wait until all currently running preempt_disable() code segments - * (including hardware-irq-disable segments) complete. Note that - * in -rt this does -not- necessarily result in all currently executing - * interrupt -handlers- having completed. - */ -synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) -EXPORT_SYMBOL_GPL(__synchronize_sched); - -/* * kthread function that manages call_rcu_sched grace periods. */ static int rcu_sched_grace_period(void *arg) ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-06 6:47 [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 Lai Jiangshan @ 2008-11-06 6:57 ` Ingo Molnar 2008-11-09 0:51 ` Paul E. McKenney 1 sibling, 0 replies; 16+ messages in thread From: Ingo Molnar @ 2008-11-06 6:57 UTC (permalink / raw) To: Lai Jiangshan Cc: Paul E. McKenney, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List * Lai Jiangshan <laijs@cn.fujitsu.com> wrote: > this fix remove ugly macro, and increase readability for rcupdate codes looks good to me, if Paul acks the concept too. Two small details: > +++ b/include/linux/rcuclassic.h > @@ -32,6 +32,7 @@ > > #ifndef __LINUX_RCUCLASSIC_H > #define __LINUX_RCUCLASSIC_H > +#define HAVE_SPECIAL_RCU_BH please use def_bool to define CONFIG_RCU_HAVE_SPECIAL_RCU_BH and: > +#else > +#define rcu_bh_qsctr_inc(cpu) > +#define rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > +#define rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > +#endif /* HAVE_SPECIAL_RCU_BH */ use inline functions please. CPP defines should never be used in new code. (use inlines instead of macros and enums/const instead of constant #define's) Ingo ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-06 6:47 [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 Lai Jiangshan 2008-11-06 6:57 ` Ingo Molnar @ 2008-11-09 0:51 ` Paul E. McKenney 2008-11-10 3:22 ` Lai Jiangshan 1 sibling, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-09 0:51 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Thu, Nov 06, 2008 at 02:47:44PM +0800, Lai Jiangshan wrote: > > this fix remove ugly macro, and increase readability for rcupdate codes > > changed from v1: > use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of > synchronize_sched(). Hello, Jiangshan! I very much like getting rid of the ugly macro. I of course like the kernel-doc fixes. ;-) I am not yet convinced of the HAVE_SPECIAL_RCU_BH and HAVE_SPECIAL_RCU_SCHED pieces. It is not clear to me that this approach is simpler than the current approach of simply providing the appropriate definitions for the symbols in the implementation-specific rcuxxx.h file. Am I missing something? Thanx, Paul > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> > --- > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h > index 5f89b62..68e84ff 100644 > --- a/include/linux/rcuclassic.h > +++ b/include/linux/rcuclassic.h > @@ -32,6 +32,7 @@ > > #ifndef __LINUX_RCUCLASSIC_H > #define __LINUX_RCUCLASSIC_H > +#define HAVE_SPECIAL_RCU_BH > > #include <linux/cache.h> > #include <linux/spinlock.h> > @@ -166,12 +167,7 @@ extern struct lockdep_map rcu_lock_map; > local_bh_enable(); \ > } while (0) > > -#define __synchronize_sched() synchronize_rcu() > - > -#define call_rcu_sched(head, func) call_rcu(head, func) > - > extern void __rcu_init(void); > -#define rcu_init_sched() do { } while (0) > extern void rcu_check_callbacks(int cpu, int user); > extern void rcu_restart_cpu(int cpu); > > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h > index 86f1f5e..7861bee 100644 > --- a/include/linux/rcupdate.h > +++ b/include/linux/rcupdate.h > @@ -112,6 +112,7 @@ struct rcu_head { > */ > #define rcu_read_unlock() __rcu_read_unlock() > > +#ifdef HAVE_SPECIAL_RCU_BH > /** > * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section > * > @@ -125,13 +126,19 @@ struct rcu_head { > */ > #define rcu_read_lock_bh() __rcu_read_lock_bh() > > -/* > +/** > * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section > * > * See rcu_read_lock_bh() for more information. > */ > #define rcu_read_unlock_bh() __rcu_read_unlock_bh() > > +#else > +#define rcu_bh_qsctr_inc(cpu) > +#define rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > +#define rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > +#endif /* HAVE_SPECIAL_RCU_BH */ > + > /** > * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section > * > @@ -189,45 +196,6 @@ struct rcu_head { > (p) = (v); \ > }) > > -/* Infrastructure to implement the synchronize_() primitives. */ > - > -struct rcu_synchronize { > - struct rcu_head head; > - struct completion completion; > -}; > - > -extern void wakeme_after_rcu(struct rcu_head *head); > - > -#define synchronize_rcu_xxx(name, func) \ > -void name(void) \ > -{ \ > - struct rcu_synchronize rcu; \ > - \ > - init_completion(&rcu.completion); \ > - /* Will wake me after RCU finished. */ \ > - func(&rcu.head, wakeme_after_rcu); \ > - /* Wait for it. */ \ > - wait_for_completion(&rcu.completion); \ > -} > - > -/** > - * synchronize_sched - block until all CPUs have exited any non-preemptive > - * kernel code sequences. > - * > - * This means that all preempt_disable code sequences, including NMI and > - * hardware-interrupt handlers, in progress on entry will have completed > - * before this primitive returns. However, this does not guarantee that > - * softirq handlers will have completed, since in some kernels, these > - * handlers can run in process context, and can block. > - * > - * This primitive provides the guarantees made by the (now removed) > - * synchronize_kernel() API. In contrast, synchronize_rcu() only > - * guarantees that rcu_read_lock() sections will have completed. > - * In "classic RCU", these two guarantees happen to be one and > - * the same, but can differ in realtime RCU implementations. > - */ > -#define synchronize_sched() __synchronize_sched() > - > /** > * call_rcu - Queue an RCU callback for invocation after a grace period. > * @head: structure to be used for queueing the RCU updates. > @@ -242,6 +210,11 @@ void name(void) \ > extern void call_rcu(struct rcu_head *head, > void (*func)(struct rcu_head *head)); > > + > +extern void synchronize_rcu(void); > +extern void rcu_barrier(void); > + > +#ifdef HAVE_SPECIAL_RCU_BH > /** > * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. > * @head: structure to be used for queueing the RCU updates. > @@ -263,11 +236,46 @@ extern void call_rcu(struct rcu_head *head, > extern void call_rcu_bh(struct rcu_head *head, > void (*func)(struct rcu_head *head)); > > -/* Exported common interfaces */ > -extern void synchronize_rcu(void); > -extern void rcu_barrier(void); > extern void rcu_barrier_bh(void); > +#else > + > +#define call_rcu_bh call_rcu > +#define rcu_barrier_bh rcu_barrier > + > +/* > + * Return the number of RCU batches processed thus far. Useful for debug > + * and statistic. The _bh variant is identifcal to straight RCU > + */ > +static inline long rcu_batches_completed_bh(void) > +{ > + return rcu_batches_completed(); > +} > +#endif /* HAVE_SPECIAL_RCU_BH */ > + > +#ifdef HAVE_SPECIAL_RCU_SCHED > +/** > + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. > + * @head: structure to be used for queueing the RCU updates. > + * @func: actual update function to be invoked after the grace period > + * > + * The update function will be invoked some time after a full > + * synchronize_sched()-style grace period elapses, in other words after > + * all currently executing preempt-disabled sections of code (including > + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have > + * completed. > + */ > +extern void call_rcu_sched(struct rcu_head *head, > + void (*func)(struct rcu_head *head)); > + > +extern void synchronize_sched(void); > extern void rcu_barrier_sched(void); > +#else > +#define call_rcu_sched call_rcu > +#define synchronize_sched synchronize_rcu > +#define rcu_barrier_sched rcu_barrier > + > +#define rcu_init_sched() do { } while (0) > +#endif /* HAVE_SPECIAL_RCU_SCHED */ > > /* Internal to kernel */ > extern void rcu_init(void); > diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h > index 3e05c09..f30d1c8 100644 > --- a/include/linux/rcupreempt.h > +++ b/include/linux/rcupreempt.h > @@ -32,6 +32,7 @@ > > #ifndef __LINUX_RCUPREEMPT_H > #define __LINUX_RCUPREEMPT_H > +#define HAVE_SPECIAL_RCU_SCHED > > #include <linux/cache.h> > #include <linux/spinlock.h> > @@ -56,54 +57,18 @@ static inline void rcu_qsctr_inc(int cpu) > > rdssp->sched_qs++; > } > -#define rcu_bh_qsctr_inc(cpu) > - > -/* > - * Someone might want to pass call_rcu_bh as a function pointer. > - * So this needs to just be a rename and not a macro function. > - * (no parentheses) > - */ > -#define call_rcu_bh call_rcu > - > -/** > - * call_rcu_sched - Queue RCU callback for invocation after sched grace period. > - * @head: structure to be used for queueing the RCU updates. > - * @func: actual update function to be invoked after the grace period > - * > - * The update function will be invoked some time after a full > - * synchronize_sched()-style grace period elapses, in other words after > - * all currently executing preempt-disabled sections of code (including > - * hardirq handlers, NMI handlers, and local_irq_save() blocks) have > - * completed. > - */ > -extern void call_rcu_sched(struct rcu_head *head, > - void (*func)(struct rcu_head *head)); > > extern void __rcu_read_lock(void) __acquires(RCU); > extern void __rcu_read_unlock(void) __releases(RCU); > extern int rcu_pending(int cpu); > extern int rcu_needs_cpu(int cpu); > > -#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > -#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > - > -extern void __synchronize_sched(void); > - > extern void __rcu_init(void); > extern void rcu_init_sched(void); > extern void rcu_check_callbacks(int cpu, int user); > extern void rcu_restart_cpu(int cpu); > extern long rcu_batches_completed(void); > > -/* > - * Return the number of RCU batches processed thus far. Useful for debug > - * and statistic. The _bh variant is identifcal to straight RCU > - */ > -static inline long rcu_batches_completed_bh(void) > -{ > - return rcu_batches_completed(); > -} > - > #ifdef CONFIG_RCU_TRACE > struct rcupreempt_trace; > extern long *rcupreempt_flipctr(int cpu); > diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c > index ad63af8..70cff32 100644 > --- a/kernel/rcupdate.c > +++ b/kernel/rcupdate.c > @@ -56,11 +56,16 @@ static atomic_t rcu_barrier_cpu_count; > static DEFINE_MUTEX(rcu_barrier_mutex); > static struct completion rcu_barrier_completion; > > +struct rcu_synchronize { > + struct rcu_head head; > + struct completion completion; > +}; > + > /* > * Awaken the corresponding synchronize_rcu() instance now that a > * grace period has elapsed. > */ > -void wakeme_after_rcu(struct rcu_head *head) > +static void wakeme_after_rcu(struct rcu_head *head) > { > struct rcu_synchronize *rcu; > > @@ -77,10 +82,48 @@ void wakeme_after_rcu(struct rcu_head *head) > * sections are delimited by rcu_read_lock() and rcu_read_unlock(), > * and may be nested. > */ > -void synchronize_rcu(void); /* Makes kernel-doc tools happy */ > -synchronize_rcu_xxx(synchronize_rcu, call_rcu) > +void synchronize_rcu(void) > +{ > + struct rcu_synchronize rcu; > + > + init_completion(&rcu.completion); > + /* Will wake me after RCU finished. */ > + call_rcu(&rcu.head, wakeme_after_rcu); > + /* Wait for it. */ > + wait_for_completion(&rcu.completion); > +} > EXPORT_SYMBOL_GPL(synchronize_rcu); > > +#ifdef HAVE_SPECIAL_RCU_SCHED > +/** > + * synchronize_sched - block until all CPUs have exited any non-preemptive > + * kernel code sequences. > + * > + * This means that all preempt_disable code sequences, including NMI and > + * hardware-interrupt handlers, in progress on entry will have completed > + * before this primitive returns. However, this does not guarantee that > + * softirq handlers will have completed, since in some kernels, these > + * handlers can run in process context, and can block. > + * > + * This primitive provides the guarantees made by the (now removed) > + * synchronize_kernel() API. In contrast, synchronize_rcu() only > + * guarantees that rcu_read_lock() sections will have completed. > + * In "classic RCU", these two guarantees happen to be one and > + * the same, but can differ in realtime RCU implementations. > + */ > +void synchronize_sched(void) > +{ > + struct rcu_synchronize rcu; > + > + init_completion(&rcu.completion); > + /* Will wake me after RCU finished. */ > + call_rcu_sched(&rcu.head, wakeme_after_rcu); > + /* Wait for it. */ > + wait_for_completion(&rcu.completion); > +} > +EXPORT_SYMBOL_GPL(synchronize_sched); > +#endif /* HAVE_SPECIAL_RCU_SCHED */ > + > static void rcu_barrier_callback(struct rcu_head *notused) > { > if (atomic_dec_and_test(&rcu_barrier_cpu_count)) > @@ -145,6 +188,7 @@ void rcu_barrier(void) > } > EXPORT_SYMBOL_GPL(rcu_barrier); > > +#ifdef HAVE_SPECIAL_RCU_BH > /** > * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. > */ > @@ -153,7 +197,9 @@ void rcu_barrier_bh(void) > _rcu_barrier(RCU_BARRIER_BH); > } > EXPORT_SYMBOL_GPL(rcu_barrier_bh); > +#endif /* HAVE_SPECIAL_RCU_BH */ > > +#ifdef HAVE_SPECIAL_RCU_SCHED > /** > * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. > */ > @@ -162,6 +208,7 @@ void rcu_barrier_sched(void) > _rcu_barrier(RCU_BARRIER_SCHED); > } > EXPORT_SYMBOL_GPL(rcu_barrier_sched); > +#endif /* HAVE_SPECIAL_RCU_SCHED */ > > void __init rcu_init(void) > { > diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c > index 59236e8..2068ad9 100644 > --- a/kernel/rcupreempt.c > +++ b/kernel/rcupreempt.c > @@ -1161,15 +1161,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) > EXPORT_SYMBOL_GPL(call_rcu_sched); > > /* > - * Wait until all currently running preempt_disable() code segments > - * (including hardware-irq-disable segments) complete. Note that > - * in -rt this does -not- necessarily result in all currently executing > - * interrupt -handlers- having completed. > - */ > -synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) > -EXPORT_SYMBOL_GPL(__synchronize_sched); > - > -/* > * kthread function that manages call_rcu_sched grace periods. > */ > static int rcu_sched_grace_period(void *arg) > > ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-09 0:51 ` Paul E. McKenney @ 2008-11-10 3:22 ` Lai Jiangshan 2008-11-10 18:45 ` Paul E. McKenney 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-10 3:22 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List Paul E. McKenney wrote: > On Thu, Nov 06, 2008 at 02:47:44PM +0800, Lai Jiangshan wrote: >> this fix remove ugly macro, and increase readability for rcupdate codes >> >> changed from v1: >> use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of >> synchronize_sched(). > > Hello, Jiangshan! > > I very much like getting rid of the ugly macro. I of course like the > kernel-doc fixes. ;-) > > I am not yet convinced of the HAVE_SPECIAL_RCU_BH and > HAVE_SPECIAL_RCU_SCHED pieces. It is not clear to me that this approach > is simpler than the current approach of simply providing the appropriate > definitions for the symbols in the implementation-specific rcuxxx.h > file. > > Am I missing something? > > Thanx, Paul > I think: RCU_BH is not required, we can used RCU instead. so HAVE_SPECIAL_RCU_BH will help for implementation which has not RCU_BH. HAVE_SPECIAL_RCU_SCHED is a little different, RCU and RCU_SCHED are both required for the kernel. But I think, in an implementation, if rcu_read_lock_sched() implies rcu_read_lock(), we may not need implement RCU_SCHED too(sometimes we may implement RCU_SCHED for performance). so HAVE_SPECIAL_RCU_SCHED will help. Thanx, Lai. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-10 3:22 ` Lai Jiangshan @ 2008-11-10 18:45 ` Paul E. McKenney 2008-11-11 0:55 ` Lai Jiangshan 0 siblings, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-10 18:45 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Mon, Nov 10, 2008 at 11:22:15AM +0800, Lai Jiangshan wrote: > Paul E. McKenney wrote: > > On Thu, Nov 06, 2008 at 02:47:44PM +0800, Lai Jiangshan wrote: > >> this fix remove ugly macro, and increase readability for rcupdate codes > >> > >> changed from v1: > >> use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of > >> synchronize_sched(). > > > > Hello, Jiangshan! > > > > I very much like getting rid of the ugly macro. I of course like the > > kernel-doc fixes. ;-) > > > > I am not yet convinced of the HAVE_SPECIAL_RCU_BH and > > HAVE_SPECIAL_RCU_SCHED pieces. It is not clear to me that this approach > > is simpler than the current approach of simply providing the appropriate > > definitions for the symbols in the implementation-specific rcuxxx.h > > file. > > > > Am I missing something? > > > > Thanx, Paul > > > > I think: > > RCU_BH is not required, we can used RCU instead. so HAVE_SPECIAL_RCU_BH > will help for implementation which has not RCU_BH. > > HAVE_SPECIAL_RCU_SCHED is a little different, RCU and RCU_SCHED are both > required for the kernel. But I think, in an implementation, > if rcu_read_lock_sched() implies rcu_read_lock(), we may not need implement > RCU_SCHED too(sometimes we may implement RCU_SCHED for performance). > so HAVE_SPECIAL_RCU_SCHED will help. If I understand correctly, this is the "old way": ------------------------------------------------------------------------ rcupdate.h: #define rcu_read_lock_bh() __rcu_read_lock_bh() #define rcu_read_unlock_bh() __rcu_read_unlock_bh() rcupreempt.h: #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } ------------------------------------------------------------------------ And then this is the "new way": ------------------------------------------------------------------------ rcupdate.h: #ifdef HAVE_SPECIAL_RCU_BH #define rcu_read_lock_bh() __rcu_read_lock_bh() #define rcu_read_unlock_bh() __rcu_read_unlock_bh() #else #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } #endif /* HAVE_SPECIAL_RCU_BH */ rcupreempt.h: #define HAVE_SPECIAL_RCU_BH ------------------------------------------------------------------------ If we had ten different RCU implementations, then the "new way" would save a little bit of code. But the "old way" is a bit easier to figure out. So I am in favor of getting rid of the ugly macro, and also in favor of fixing the kerneldoc, but opposed to the HAVE_SPECIAL_RCU_BH and HAVE_SPECIAL_RCU_SCHED changes. Or am I missing something? Thanx, Paul ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-10 18:45 ` Paul E. McKenney @ 2008-11-11 0:55 ` Lai Jiangshan 2008-11-11 1:03 ` Paul E. McKenney 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-11 0:55 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List Paul E. McKenney wrote: > On Mon, Nov 10, 2008 at 11:22:15AM +0800, Lai Jiangshan wrote: >> Paul E. McKenney wrote: >>> On Thu, Nov 06, 2008 at 02:47:44PM +0800, Lai Jiangshan wrote: >>>> this fix remove ugly macro, and increase readability for rcupdate codes >>>> >>>> changed from v1: >>>> use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of >>>> synchronize_sched(). >>> Hello, Jiangshan! >>> >>> I very much like getting rid of the ugly macro. I of course like the >>> kernel-doc fixes. ;-) >>> >>> I am not yet convinced of the HAVE_SPECIAL_RCU_BH and >>> HAVE_SPECIAL_RCU_SCHED pieces. It is not clear to me that this approach >>> is simpler than the current approach of simply providing the appropriate >>> definitions for the symbols in the implementation-specific rcuxxx.h >>> file. >>> >>> Am I missing something? >>> >>> Thanx, Paul >>> >> I think: >> >> RCU_BH is not required, we can used RCU instead. so HAVE_SPECIAL_RCU_BH >> will help for implementation which has not RCU_BH. >> >> HAVE_SPECIAL_RCU_SCHED is a little different, RCU and RCU_SCHED are both >> required for the kernel. But I think, in an implementation, >> if rcu_read_lock_sched() implies rcu_read_lock(), we may not need implement >> RCU_SCHED too(sometimes we may implement RCU_SCHED for performance). >> so HAVE_SPECIAL_RCU_SCHED will help. > > If I understand correctly, this is the "old way": > > ------------------------------------------------------------------------ > > rcupdate.h: > > #define rcu_read_lock_bh() __rcu_read_lock_bh() > #define rcu_read_unlock_bh() __rcu_read_unlock_bh() > > rcupreempt.h: > > #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > > ------------------------------------------------------------------------ > > And then this is the "new way": > > ------------------------------------------------------------------------ > > rcupdate.h: > > #ifdef HAVE_SPECIAL_RCU_BH > #define rcu_read_lock_bh() __rcu_read_lock_bh() > #define rcu_read_unlock_bh() __rcu_read_unlock_bh() > #else > #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > #endif /* HAVE_SPECIAL_RCU_BH */ > > rcupreempt.h: > > #define HAVE_SPECIAL_RCU_BH > > ------------------------------------------------------------------------ > > If we had ten different RCU implementations, then the "new way" would save > a little bit of code. But the "old way" is a bit easier to figure out. > > So I am in favor of getting rid of the ugly macro, and also in favor > of fixing the kerneldoc, but opposed to the HAVE_SPECIAL_RCU_BH and > HAVE_SPECIAL_RCU_SCHED changes. I apprehended and agree with you. Thanx. Lai. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-11 0:55 ` Lai Jiangshan @ 2008-11-11 1:03 ` Paul E. McKenney 2008-11-13 2:48 ` Lai Jiangshan 0 siblings, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-11 1:03 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Tue, Nov 11, 2008 at 08:55:00AM +0800, Lai Jiangshan wrote: > Paul E. McKenney wrote: > > On Mon, Nov 10, 2008 at 11:22:15AM +0800, Lai Jiangshan wrote: > >> Paul E. McKenney wrote: > >>> On Thu, Nov 06, 2008 at 02:47:44PM +0800, Lai Jiangshan wrote: > >>>> this fix remove ugly macro, and increase readability for rcupdate codes > >>>> > >>>> changed from v1: > >>>> use HAVE_SPECIAL_RCU_BH/SCHED instead of define duplicate version of > >>>> synchronize_sched(). > >>> Hello, Jiangshan! > >>> > >>> I very much like getting rid of the ugly macro. I of course like the > >>> kernel-doc fixes. ;-) > >>> > >>> I am not yet convinced of the HAVE_SPECIAL_RCU_BH and > >>> HAVE_SPECIAL_RCU_SCHED pieces. It is not clear to me that this approach > >>> is simpler than the current approach of simply providing the appropriate > >>> definitions for the symbols in the implementation-specific rcuxxx.h > >>> file. > >>> > >>> Am I missing something? > >>> > >>> Thanx, Paul > >>> > >> I think: > >> > >> RCU_BH is not required, we can used RCU instead. so HAVE_SPECIAL_RCU_BH > >> will help for implementation which has not RCU_BH. > >> > >> HAVE_SPECIAL_RCU_SCHED is a little different, RCU and RCU_SCHED are both > >> required for the kernel. But I think, in an implementation, > >> if rcu_read_lock_sched() implies rcu_read_lock(), we may not need implement > >> RCU_SCHED too(sometimes we may implement RCU_SCHED for performance). > >> so HAVE_SPECIAL_RCU_SCHED will help. > > > > If I understand correctly, this is the "old way": > > > > ------------------------------------------------------------------------ > > > > rcupdate.h: > > > > #define rcu_read_lock_bh() __rcu_read_lock_bh() > > #define rcu_read_unlock_bh() __rcu_read_unlock_bh() > > > > rcupreempt.h: > > > > #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > > #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > > > > ------------------------------------------------------------------------ > > > > And then this is the "new way": > > > > ------------------------------------------------------------------------ > > > > rcupdate.h: > > > > #ifdef HAVE_SPECIAL_RCU_BH > > #define rcu_read_lock_bh() __rcu_read_lock_bh() > > #define rcu_read_unlock_bh() __rcu_read_unlock_bh() > > #else > > #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } > > #define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } > > #endif /* HAVE_SPECIAL_RCU_BH */ > > > > rcupreempt.h: > > > > #define HAVE_SPECIAL_RCU_BH > > > > ------------------------------------------------------------------------ > > > > If we had ten different RCU implementations, then the "new way" would save > > a little bit of code. But the "old way" is a bit easier to figure out. > > > > So I am in favor of getting rid of the ugly macro, and also in favor > > of fixing the kerneldoc, but opposed to the HAVE_SPECIAL_RCU_BH and > > HAVE_SPECIAL_RCU_SCHED changes. > > I apprehended and agree with you. Thanx. Sounds good -- and thank you for your much-needed efforts to improve the RCU implementation! Thanx, Paul ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-11 1:03 ` Paul E. McKenney @ 2008-11-13 2:48 ` Lai Jiangshan 2008-11-13 17:31 ` Paul E. McKenney 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-13 2:48 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List Hi, Paul, Could you add a RCU document about unloadable modules for kernel? Thanx, Lai. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-13 2:48 ` Lai Jiangshan @ 2008-11-13 17:31 ` Paul E. McKenney 2008-11-14 1:03 ` Lai Jiangshan 0 siblings, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-13 17:31 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List, corbet On Thu, Nov 13, 2008 at 10:48:33AM +0800, Lai Jiangshan wrote: > > Hi, Paul, > > Could you add a RCU document about unloadable modules for kernel? You thinking in terms of an ASCII version of http://lwn.net/Articles/217484/? If so, please see attached patch and let me know what you think. Being too lazy to convert the cartoon to ASCII graphics, I simply left a URL to the .jpg on the LWN website. Thus we need an ack/nack from Jon Corbet (CCed). Of course, an alternative is to simply include the URL of the original LWN article in 00-INDEX. Thoughts? Thanx, Paul Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> --- 00-INDEX | 2 rcubarrier.txt | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 311 insertions(+) diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/00-INDEX linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX --- linux-2.6.27/Documentation/RCU/00-INDEX 2008-10-09 15:13:53.000000000 -0700 +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX 2008-11-13 08:46:17.000000000 -0800 @@ -12,6 +12,8 @@ rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt - RCU Concepts +rcubarrier.txt + - Unloading modules that use RCU callbacks RTFP.txt - List of RCU papers (bibliography) going back to 1980. torture.txt diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/rcubarrier.txt linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt --- linux-2.6.27/Documentation/RCU/rcubarrier.txt 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt 2008-11-13 09:26:16.000000000 -0800 @@ -0,0 +1,309 @@ +RCU and Unloadable Modules + +[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/] + +RCU (read-copy update) is a synchronization mechanism that can be thought +of as a replacement for read-writer locking (among other things), but with +very low-overhead readers that are immune to deadlock, priority inversion, +and unbounded latency. RCU read-side critical sections are delimited +by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT +kernels, generate no code whatsoever. + +This means that RCU writers are unaware of the presence of concurrent +readers, so that RCU updates to shared data must be undertaken quite +carefully, leaving an old version of the data structure in place until all +pre-existing readers have finished. These old versions are needed because +such readers might hold a reference to them. RCU updates can therefore be +rather expensive, and RCU is thus best suited for read-mostly situations. + +How can an RCU writer possibly determine when all readers are finished, +given that readers might well leave absolutely no trace of their +presence? There is a synchronize_rcu() primitive that blocks until all +pre-existing readers have completed. An updater wishing to delete an +element p from a linked list might do the following, while holding an +appropriate lock, of course: + + list_del_rcu(p); + synchronize_rcu(); + kfree(p); + +But the above code cannot be used in IRQ context -- the call_rcu() +primitive must be used instead. This primitive takes a pointer to an +rcu_head struct placed within the RCU-protected data structure and +another pointer to a function that may be invoked later to free that +structure. Code to delete an element p from the linked list from IRQ +context might then be as follows: + + list_del_rcu(p); + call_rcu(&p->rcu, p_callback); + +Since call_rcu() never blocks, this code can safely be used from within +IRQ context. The function p_callback() might be defined as follows: + + static void p_callback(struct rcu_head *rp) + { + struct pstruct *p = container_of(rp, struct pstruct, rcu); + + kfree(p); + } + + +Unloading Modules That Use call_rcu() + +But what if p_callback is defined in an unloadable module? + +If we unload the module while some RCU callbacks are pending, +the CPUs executing these callbacks are going to be severely +disappointed when they are later invoked, as fancifully depicted at +http://lwn.net/images/ns/kernel/rcu-drop.jpg. + +We could try placing a synchronize_rcu() in the module-exit code path, +but this is not sufficient. Although synchronize_rcu() does wait for a +grace period to elapse, it does not wait for the callbacks to complete. + +One might be tempted to try several back-to-back synchronize_rcu() +calls, but this is still not guaranteed to work. If there is a very +heavy RCU-callback load, then some of the callbacks might be deferred +in order to allow other processing to proceed. Such deferral is required +in realtime kernels in order to avoid excessive scheduling latencies. + + +rcu_barrier() + +We instead need the rcu_barrier() primitive. This primitive is similar +to synchronize_rcu(), but instead of waiting solely for a grace +period to elapse, it also waits for all outstanding RCU callbacks to +complete. Pseudo-code using rcu_barrier() is as follows: + + 1. Prevent any new RCU callbacks from being posted. + 2. Execute rcu_barrier(). + 3. Allow the module to be unloaded. + +Quick Quiz #1: Why is there no srcu_barrier()? + +Quick Quiz #2: Why is there no rcu_barrier_bh()? + +The rcutorture module makes use of rcu_barrier in its exit function +as follows: + + 1 static void + 2 rcu_torture_cleanup(void) + 3 { + 4 int i; + 5 + 6 fullstop = 1; + 7 if (shuffler_task != NULL) { + 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + 9 kthread_stop(shuffler_task); +10 } +11 shuffler_task = NULL; +12 +13 if (writer_task != NULL) { +14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); +15 kthread_stop(writer_task); +16 } +17 writer_task = NULL; +18 +19 if (reader_tasks != NULL) { +20 for (i = 0; i < nrealreaders; i++) { +21 if (reader_tasks[i] != NULL) { +22 VERBOSE_PRINTK_STRING( +23 "Stopping rcu_torture_reader task"); +24 kthread_stop(reader_tasks[i]); +25 } +26 reader_tasks[i] = NULL; +27 } +28 kfree(reader_tasks); +29 reader_tasks = NULL; +30 } +31 rcu_torture_current = NULL; +32 +33 if (fakewriter_tasks != NULL) { +34 for (i = 0; i < nfakewriters; i++) { +35 if (fakewriter_tasks[i] != NULL) { +36 VERBOSE_PRINTK_STRING( +37 "Stopping rcu_torture_fakewriter task"); +38 kthread_stop(fakewriter_tasks[i]); +39 } +40 fakewriter_tasks[i] = NULL; +41 } +42 kfree(fakewriter_tasks); +43 fakewriter_tasks = NULL; +44 } +45 +46 if (stats_task != NULL) { +47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); +48 kthread_stop(stats_task); +49 } +50 stats_task = NULL; +51 +52 /* Wait for all RCU callbacks to fire. */ +53 rcu_barrier(); +54 +55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ +56 +57 if (cur_ops->cleanup != NULL) +58 cur_ops->cleanup(); +59 if (atomic_read(&n_rcu_torture_error)) +60 rcu_torture_print_module_parms("End of test: FAILURE"); +61 else +62 rcu_torture_print_module_parms("End of test: SUCCESS"); +63 } + +Line 6 sets a global variable that prevents any RCU callbacks from +re-posting themselves. This will not be necessary in most cases, since +RCU callbacks rarely include calls to call_rcu(). However, the rcutorture +module is an exception to this rule, and therefore needs to set this +global variable. + +Lines 7-50 stop all the kernel tasks associated with the rcutorture +module. Therefore, once execution reaches line 53, no more rcutorture +RCU callbacks will be posted. The rcu_barrier() call on line 53 waits +for any pre-existing callbacks to complete. + +Then lines 55-62 print status and do operation-specific cleanup, and +then return, permitting the module-unload operation to be completed. + +Quick Quiz #3: Is there any other situation where rcu_barrier() might + be required? + +Your module might have additional complications. For example, if your +module invokes call_rcu() from timers, you will need to first cancel all +the timers, and only then invoke rcu_barrier() to wait for any remaining +RCU callbacks to complete. + + +Implementing rcu_barrier() + +Dipankar Sarma's implementation of rcu_barrier() makes use of the fact +that RCU callbacks are never reordered once queued on one of the per-CPU +queues. His implementation queues an RCU callback on each of the per-CPU +callback queues, and then waits until they have all started executing, at +which point, all earlier RCU callbacks are guaranteed to have completed. + +The code for rcu_barrier() is as follows: + + 1 void rcu_barrier(void) + 2 { + 3 BUG_ON(in_interrupt()); + 4 /* Take cpucontrol mutex to protect against CPU hotplug */ + 5 mutex_lock(&rcu_barrier_mutex); + 6 init_completion(&rcu_barrier_completion); + 7 atomic_set(&rcu_barrier_cpu_count, 0); + 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1); + 9 wait_for_completion(&rcu_barrier_completion); +10 mutex_unlock(&rcu_barrier_mutex); +11 } + +Line 3 verifies that the caller is in process context, and lines 5 and 10 +use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the +global completion and counters at a time, which are initialized on lines +6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is +shown below. Note that the final "1" in on_each_cpu()'s argument list +ensures that all the calls to rcu_barrier_func() will have completed +before on_each_cpu() returns. Line 9 then waits for the completion. + +The rcu_barrier_func() runs on each CPU, where it invokes call_rcu() +to post an RCU callback, as follows: + + 1 static void rcu_barrier_func(void *notused) + 2 { + 3 int cpu = smp_processor_id(); + 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + 5 struct rcu_head *head; + 6 + 7 head = &rdp->barrier; + 8 atomic_inc(&rcu_barrier_cpu_count); + 9 call_rcu(head, rcu_barrier_callback); +10 } + +Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure, +which contains the struct rcu_head that needed for the later call to +call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line +8 increments a global counter. This counter will later be decremented +by the callback. Line 9 then registers the rcu_barrier_callback() on +the current CPU's queue. + +The rcu_barrier_callback() function simply atomically decrements the +rcu_barrier_cpu_count variable and finalizes the completion when it +reaches zero, as follows: + + 1 static void rcu_barrier_callback(struct rcu_head *notused) + 2 { + 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + 4 complete(&rcu_barrier_completion); + 5 } + +Quick Quiz #4: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + + +rcu_barrier() Summary + +The rcu_barrier() primitive has seen relatively little use, since most +code using RCU is in the core kernel rather than in modules. However, if +you are using RCU from an unloadable module, you need to use rcu_barrier() +so that your module may be safely unloaded. + + +Answers to Quick Quizzes + +Quick Quiz #1: Why is there no srcu_barrier()? + +Answer: Since there is no call_srcu(), there can be no outstanding SRCU + callbacks. Therefore, there is no need to wait for them. + +Quick Quiz #2: Why is there no rcu_barrier_bh()? + +Answer: Because no one has needed it yet. As soon as someone needs to + use call_rcu_bh() from within an unloadable module, they will + need an rcu_barrier_bh(). + +Quick Quiz #3: Is there any other situation where rcu_barrier() might + be required? + +Answer: Interestingly enough, rcu_barrier() was not originally + implemented for module unloading. Nikita Danilov was using + RCU in a filesystem, which resulted in a similar situation at + filesystem-unmount time. Dipankar Sarma coded up rcu_barrier() + in response, so that Nikita could invoke it during the + filesystem-unmount process. + + Much later, yours truly hit the RCU module-unload problem when + implementing rcutorture, and found that rcu_barrier() solves + this problem as well. + +Quick Quiz #4: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + +Answer: This cannot happen. The reason is that on_each_cpu() has its last + argument, the wait flag, set to "1". This flag is passed through + to smp_call_function() and further to smp_call_function_on_cpu(), + causing this latter to spin until the cross-CPU invocation of + rcu_barrier_func() has completed. This by itself would prevent + a grace period from completing on non-CONFIG_PREEMPT kernels, + since each CPU must undergo a context switch (or other quiescent + state) before the grace period can complete. However, this is + of no use in CONFIG_PREEMPT kernels. + + Therefore, on_each_cpu() disables preemption across its call + to smp_call_function() and also across the local call to + rcu_barrier_func(). This prevents the local CPU from context + switching, again preventing grace periods from completing. This + means that all CPUs have executed rcu_barrier_func() before + the first rcu_barrier_callback() can possibly execute, in turn + preventing rcu_barrier_cpu_count from prematurely reaching zero. + + Currently, -rt implementations of RCU keep but a single global + queue for RCU callbacks, and thus do not suffer from this + problem. However, when the -rt RCU eventually does have per-CPU + callback queues, things will have to change. One simple change + is to add an rcu_read_lock() before line 8 of rcu_barrier() + and an rcu_read_unlock() after line 8 of this same function. If + you can think of a better change, please let me know! ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-13 17:31 ` Paul E. McKenney @ 2008-11-14 1:03 ` Lai Jiangshan 2008-11-14 2:11 ` Paul E. McKenney 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-14 1:03 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List, corbet Paul E. McKenney wrote: > On Thu, Nov 13, 2008 at 10:48:33AM +0800, Lai Jiangshan wrote: >> Hi, Paul, >> >> Could you add a RCU document about unloadable modules for kernel? > > You thinking in terms of an ASCII version of > http://lwn.net/Articles/217484/? > > If so, please see attached patch and let me know what you think. > Being too lazy to convert the cartoon to ASCII graphics, I simply > left a URL to the .jpg on the LWN website. Thus we need an ack/nack > from Jon Corbet (CCed). Hi, Paul Thank you. it's a very good document. I found several modules which need rcu_barrier(). So I'm going to do some cleanup for them. A document for rcu_barrier() will help these cleanup patches be accepted easily by maintainers. Lai. > > Of course, an alternative is to simply include the URL of the original > LWN article in 00-INDEX. Thoughts? > > Thanx, Paul > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > --- > +The code for rcu_barrier() is as follows: > + > + 1 void rcu_barrier(void) > + 2 { > + 3 BUG_ON(in_interrupt()); > + 4 /* Take cpucontrol mutex to protect against CPU hotplug */ > + 5 mutex_lock(&rcu_barrier_mutex); > + 6 init_completion(&rcu_barrier_completion); > + 7 atomic_set(&rcu_barrier_cpu_count, 0); > + 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1); > + 9 wait_for_completion(&rcu_barrier_completion); > +10 mutex_unlock(&rcu_barrier_mutex); > +11 } > + this is a little old. > + > +Quick Quiz #2: Why is there no rcu_barrier_bh()? > + > +Answer: Because no one has needed it yet. As soon as someone needs to > + use call_rcu_bh() from within an unloadable module, they will > + need an rcu_barrier_bh(). > + add here. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-14 1:03 ` Lai Jiangshan @ 2008-11-14 2:11 ` Paul E. McKenney 2008-11-14 7:39 ` Lai Jiangshan 0 siblings, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-14 2:11 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List, corbet On Fri, Nov 14, 2008 at 09:03:20AM +0800, Lai Jiangshan wrote: > Paul E. McKenney wrote: > > On Thu, Nov 13, 2008 at 10:48:33AM +0800, Lai Jiangshan wrote: > >> Hi, Paul, > >> > >> Could you add a RCU document about unloadable modules for kernel? > > > > You thinking in terms of an ASCII version of > > http://lwn.net/Articles/217484/? > > > > If so, please see attached patch and let me know what you think. > > Being too lazy to convert the cartoon to ASCII graphics, I simply > > left a URL to the .jpg on the LWN website. Thus we need an ack/nack > > from Jon Corbet (CCed). > > Hi, Paul > > Thank you. it's a very good document. Updated version attached. > I found several modules which need rcu_barrier(). So I'm going to > do some cleanup for them. A document for rcu_barrier() will help > these cleanup patches be accepted easily by maintainers. Sounds very good!!! > Lai. > > > > > Of course, an alternative is to simply include the URL of the original > > LWN article in 00-INDEX. Thoughts? > > > > Thanx, Paul > > > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > --- > > > +The code for rcu_barrier() is as follows: > > + > > + 1 void rcu_barrier(void) > > + 2 { > > + 3 BUG_ON(in_interrupt()); > > + 4 /* Take cpucontrol mutex to protect against CPU hotplug */ > > + 5 mutex_lock(&rcu_barrier_mutex); > > + 6 init_completion(&rcu_barrier_completion); > > + 7 atomic_set(&rcu_barrier_cpu_count, 0); > > + 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1); > > + 9 wait_for_completion(&rcu_barrier_completion); > > +10 mutex_unlock(&rcu_barrier_mutex); > > +11 } > > + > > this is a little old. Indeed!!! Good catch! I left this code, but added words saying that it was the original and that it has since been rewritten to add support for rcu_barrier_bh() and rcu_barrier_sched(). > > + > > +Quick Quiz #2: Why is there no rcu_barrier_bh()? > > + > > +Answer: Because no one has needed it yet. As soon as someone needs to > > + use call_rcu_bh() from within an unloadable module, they will > > + need an rcu_barrier_bh(). > > + > > add here. Good point, I deleted this Quick Quiz. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> --- diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/00-INDEX linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX --- linux-2.6.27/Documentation/RCU/00-INDEX 2008-10-09 15:13:53.000000000 -0700 +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX 2008-11-13 08:46:17.000000000 -0800 @@ -12,6 +12,8 @@ rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt - RCU Concepts +rcubarrier.txt + - Unloading modules that use RCU callbacks RTFP.txt - List of RCU papers (bibliography) going back to 1980. torture.txt diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/rcubarrier.txt linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt --- linux-2.6.27/Documentation/RCU/rcubarrier.txt 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt 2008-11-13 18:08:35.000000000 -0800 @@ -0,0 +1,304 @@ +RCU and Unloadable Modules + +[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/] + +RCU (read-copy update) is a synchronization mechanism that can be thought +of as a replacement for read-writer locking (among other things), but with +very low-overhead readers that are immune to deadlock, priority inversion, +and unbounded latency. RCU read-side critical sections are delimited +by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT +kernels, generate no code whatsoever. + +This means that RCU writers are unaware of the presence of concurrent +readers, so that RCU updates to shared data must be undertaken quite +carefully, leaving an old version of the data structure in place until all +pre-existing readers have finished. These old versions are needed because +such readers might hold a reference to them. RCU updates can therefore be +rather expensive, and RCU is thus best suited for read-mostly situations. + +How can an RCU writer possibly determine when all readers are finished, +given that readers might well leave absolutely no trace of their +presence? There is a synchronize_rcu() primitive that blocks until all +pre-existing readers have completed. An updater wishing to delete an +element p from a linked list might do the following, while holding an +appropriate lock, of course: + + list_del_rcu(p); + synchronize_rcu(); + kfree(p); + +But the above code cannot be used in IRQ context -- the call_rcu() +primitive must be used instead. This primitive takes a pointer to an +rcu_head struct placed within the RCU-protected data structure and +another pointer to a function that may be invoked later to free that +structure. Code to delete an element p from the linked list from IRQ +context might then be as follows: + + list_del_rcu(p); + call_rcu(&p->rcu, p_callback); + +Since call_rcu() never blocks, this code can safely be used from within +IRQ context. The function p_callback() might be defined as follows: + + static void p_callback(struct rcu_head *rp) + { + struct pstruct *p = container_of(rp, struct pstruct, rcu); + + kfree(p); + } + + +Unloading Modules That Use call_rcu() + +But what if p_callback is defined in an unloadable module? + +If we unload the module while some RCU callbacks are pending, +the CPUs executing these callbacks are going to be severely +disappointed when they are later invoked, as fancifully depicted at +http://lwn.net/images/ns/kernel/rcu-drop.jpg. + +We could try placing a synchronize_rcu() in the module-exit code path, +but this is not sufficient. Although synchronize_rcu() does wait for a +grace period to elapse, it does not wait for the callbacks to complete. + +One might be tempted to try several back-to-back synchronize_rcu() +calls, but this is still not guaranteed to work. If there is a very +heavy RCU-callback load, then some of the callbacks might be deferred +in order to allow other processing to proceed. Such deferral is required +in realtime kernels in order to avoid excessive scheduling latencies. + + +rcu_barrier() + +We instead need the rcu_barrier() primitive. This primitive is similar +to synchronize_rcu(), but instead of waiting solely for a grace +period to elapse, it also waits for all outstanding RCU callbacks to +complete. Pseudo-code using rcu_barrier() is as follows: + + 1. Prevent any new RCU callbacks from being posted. + 2. Execute rcu_barrier(). + 3. Allow the module to be unloaded. + +Quick Quiz #1: Why is there no srcu_barrier()? + +The rcutorture module makes use of rcu_barrier in its exit function +as follows: + + 1 static void + 2 rcu_torture_cleanup(void) + 3 { + 4 int i; + 5 + 6 fullstop = 1; + 7 if (shuffler_task != NULL) { + 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + 9 kthread_stop(shuffler_task); +10 } +11 shuffler_task = NULL; +12 +13 if (writer_task != NULL) { +14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); +15 kthread_stop(writer_task); +16 } +17 writer_task = NULL; +18 +19 if (reader_tasks != NULL) { +20 for (i = 0; i < nrealreaders; i++) { +21 if (reader_tasks[i] != NULL) { +22 VERBOSE_PRINTK_STRING( +23 "Stopping rcu_torture_reader task"); +24 kthread_stop(reader_tasks[i]); +25 } +26 reader_tasks[i] = NULL; +27 } +28 kfree(reader_tasks); +29 reader_tasks = NULL; +30 } +31 rcu_torture_current = NULL; +32 +33 if (fakewriter_tasks != NULL) { +34 for (i = 0; i < nfakewriters; i++) { +35 if (fakewriter_tasks[i] != NULL) { +36 VERBOSE_PRINTK_STRING( +37 "Stopping rcu_torture_fakewriter task"); +38 kthread_stop(fakewriter_tasks[i]); +39 } +40 fakewriter_tasks[i] = NULL; +41 } +42 kfree(fakewriter_tasks); +43 fakewriter_tasks = NULL; +44 } +45 +46 if (stats_task != NULL) { +47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); +48 kthread_stop(stats_task); +49 } +50 stats_task = NULL; +51 +52 /* Wait for all RCU callbacks to fire. */ +53 rcu_barrier(); +54 +55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ +56 +57 if (cur_ops->cleanup != NULL) +58 cur_ops->cleanup(); +59 if (atomic_read(&n_rcu_torture_error)) +60 rcu_torture_print_module_parms("End of test: FAILURE"); +61 else +62 rcu_torture_print_module_parms("End of test: SUCCESS"); +63 } + +Line 6 sets a global variable that prevents any RCU callbacks from +re-posting themselves. This will not be necessary in most cases, since +RCU callbacks rarely include calls to call_rcu(). However, the rcutorture +module is an exception to this rule, and therefore needs to set this +global variable. + +Lines 7-50 stop all the kernel tasks associated with the rcutorture +module. Therefore, once execution reaches line 53, no more rcutorture +RCU callbacks will be posted. The rcu_barrier() call on line 53 waits +for any pre-existing callbacks to complete. + +Then lines 55-62 print status and do operation-specific cleanup, and +then return, permitting the module-unload operation to be completed. + +Quick Quiz #2: Is there any other situation where rcu_barrier() might + be required? + +Your module might have additional complications. For example, if your +module invokes call_rcu() from timers, you will need to first cancel all +the timers, and only then invoke rcu_barrier() to wait for any remaining +RCU callbacks to complete. + + +Implementing rcu_barrier() + +Dipankar Sarma's implementation of rcu_barrier() makes use of the fact +that RCU callbacks are never reordered once queued on one of the per-CPU +queues. His implementation queues an RCU callback on each of the per-CPU +callback queues, and then waits until they have all started executing, at +which point, all earlier RCU callbacks are guaranteed to have completed. + +The original code for rcu_barrier() was as follows: + + 1 void rcu_barrier(void) + 2 { + 3 BUG_ON(in_interrupt()); + 4 /* Take cpucontrol mutex to protect against CPU hotplug */ + 5 mutex_lock(&rcu_barrier_mutex); + 6 init_completion(&rcu_barrier_completion); + 7 atomic_set(&rcu_barrier_cpu_count, 0); + 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1); + 9 wait_for_completion(&rcu_barrier_completion); +10 mutex_unlock(&rcu_barrier_mutex); +11 } + +Line 3 verifies that the caller is in process context, and lines 5 and 10 +use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the +global completion and counters at a time, which are initialized on lines +6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is +shown below. Note that the final "1" in on_each_cpu()'s argument list +ensures that all the calls to rcu_barrier_func() will have completed +before on_each_cpu() returns. Line 9 then waits for the completion. + +This code was rewritten in 2008 to support rcu_barrier_bh() and +rcu_barrier_sched() in addition to the original rcu_barrier(). + +The rcu_barrier_func() runs on each CPU, where it invokes call_rcu() +to post an RCU callback, as follows: + + 1 static void rcu_barrier_func(void *notused) + 2 { + 3 int cpu = smp_processor_id(); + 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + 5 struct rcu_head *head; + 6 + 7 head = &rdp->barrier; + 8 atomic_inc(&rcu_barrier_cpu_count); + 9 call_rcu(head, rcu_barrier_callback); +10 } + +Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure, +which contains the struct rcu_head that needed for the later call to +call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line +8 increments a global counter. This counter will later be decremented +by the callback. Line 9 then registers the rcu_barrier_callback() on +the current CPU's queue. + +The rcu_barrier_callback() function simply atomically decrements the +rcu_barrier_cpu_count variable and finalizes the completion when it +reaches zero, as follows: + + 1 static void rcu_barrier_callback(struct rcu_head *notused) + 2 { + 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + 4 complete(&rcu_barrier_completion); + 5 } + +Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + + +rcu_barrier() Summary + +The rcu_barrier() primitive has seen relatively little use, since most +code using RCU is in the core kernel rather than in modules. However, if +you are using RCU from an unloadable module, you need to use rcu_barrier() +so that your module may be safely unloaded. + + +Answers to Quick Quizzes + +Quick Quiz #1: Why is there no srcu_barrier()? + +Answer: Since there is no call_srcu(), there can be no outstanding SRCU + callbacks. Therefore, there is no need to wait for them. + +Quick Quiz #2: Is there any other situation where rcu_barrier() might + be required? + +Answer: Interestingly enough, rcu_barrier() was not originally + implemented for module unloading. Nikita Danilov was using + RCU in a filesystem, which resulted in a similar situation at + filesystem-unmount time. Dipankar Sarma coded up rcu_barrier() + in response, so that Nikita could invoke it during the + filesystem-unmount process. + + Much later, yours truly hit the RCU module-unload problem when + implementing rcutorture, and found that rcu_barrier() solves + this problem as well. + +Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes + immediately (thus incrementing rcu_barrier_cpu_count to the + value one), but the other CPU's rcu_barrier_func() invocations + are delayed for a full grace period? Couldn't this result in + rcu_barrier() returning prematurely? + +Answer: This cannot happen. The reason is that on_each_cpu() has its last + argument, the wait flag, set to "1". This flag is passed through + to smp_call_function() and further to smp_call_function_on_cpu(), + causing this latter to spin until the cross-CPU invocation of + rcu_barrier_func() has completed. This by itself would prevent + a grace period from completing on non-CONFIG_PREEMPT kernels, + since each CPU must undergo a context switch (or other quiescent + state) before the grace period can complete. However, this is + of no use in CONFIG_PREEMPT kernels. + + Therefore, on_each_cpu() disables preemption across its call + to smp_call_function() and also across the local call to + rcu_barrier_func(). This prevents the local CPU from context + switching, again preventing grace periods from completing. This + means that all CPUs have executed rcu_barrier_func() before + the first rcu_barrier_callback() can possibly execute, in turn + preventing rcu_barrier_cpu_count from prematurely reaching zero. + + Currently, -rt implementations of RCU keep but a single global + queue for RCU callbacks, and thus do not suffer from this + problem. However, when the -rt RCU eventually does have per-CPU + callback queues, things will have to change. One simple change + is to add an rcu_read_lock() before line 8 of rcu_barrier() + and an rcu_read_unlock() after line 8 of this same function. If + you can think of a better change, please let me know! ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-14 2:11 ` Paul E. McKenney @ 2008-11-14 7:39 ` Lai Jiangshan 2008-11-14 19:25 ` Jonathan Corbet 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-14 7:39 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List, corbet Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> Paul E. McKenney wrote: [...] > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > --- > > diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/00-INDEX linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX > --- linux-2.6.27/Documentation/RCU/00-INDEX 2008-10-09 15:13:53.000000000 -0700 > +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/00-INDEX 2008-11-13 08:46:17.000000000 -0800 > @@ -12,6 +12,8 @@ rcuref.txt > - Reference-count design for elements of lists/arrays protected by RCU > rcu.txt > - RCU Concepts > +rcubarrier.txt > + - Unloading modules that use RCU callbacks > RTFP.txt > - List of RCU papers (bibliography) going back to 1980. > torture.txt > diff -urpNa -X dontdiff linux-2.6.27/Documentation/RCU/rcubarrier.txt linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt > --- linux-2.6.27/Documentation/RCU/rcubarrier.txt 1969-12-31 16:00:00.000000000 -0800 > +++ linux-2.6.27-rcu_barrierdoc/Documentation/RCU/rcubarrier.txt 2008-11-13 18:08:35.000000000 -0800 [...] ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-14 7:39 ` Lai Jiangshan @ 2008-11-14 19:25 ` Jonathan Corbet 2008-11-15 20:39 ` Paul E. McKenney 0 siblings, 1 reply; 16+ messages in thread From: Jonathan Corbet @ 2008-11-14 19:25 UTC (permalink / raw) To: Lai Jiangshan Cc: paulmck, Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Fri, 14 Nov 2008 15:39:02 +0800 Lai Jiangshan <laijs@cn.fujitsu.com> wrote: > Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> > > Paul E. McKenney wrote: All seems good. To answer Paul's original question: > If so, please see attached patch and let me know what you think. > Being too lazy to convert the cartoon to ASCII graphics, I simply > left a URL to the .jpg on the LWN website. Thus we need an ack/nack > from Jon Corbet (CCed). Of course I have no problem with the LWN links; they will remain stable. If nobody objects, I'll drop this into my documentation tree. Gotta have *something* there, after all, or it might start to feel neglected... jon ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-14 19:25 ` Jonathan Corbet @ 2008-11-15 20:39 ` Paul E. McKenney 2008-11-17 12:57 ` Lai Jiangshan 0 siblings, 1 reply; 16+ messages in thread From: Paul E. McKenney @ 2008-11-15 20:39 UTC (permalink / raw) To: Jonathan Corbet Cc: Lai Jiangshan, Ingo Molnar, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Fri, Nov 14, 2008 at 12:25:43PM -0700, Jonathan Corbet wrote: > On Fri, 14 Nov 2008 15:39:02 +0800 > Lai Jiangshan <laijs@cn.fujitsu.com> wrote: > > > Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> > > > > Paul E. McKenney wrote: > > All seems good. To answer Paul's original question: > > > If so, please see attached patch and let me know what you think. > > Being too lazy to convert the cartoon to ASCII graphics, I simply > > left a URL to the .jpg on the LWN website. Thus we need an ack/nack > > from Jon Corbet (CCed). > > Of course I have no problem with the LWN links; they will remain stable. > > If nobody objects, I'll drop this into my documentation tree. Gotta > have *something* there, after all, or it might start to feel > neglected... Works for me! Thanx, Paul ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-15 20:39 ` Paul E. McKenney @ 2008-11-17 12:57 ` Lai Jiangshan 2008-11-17 21:28 ` Jonathan Corbet 0 siblings, 1 reply; 16+ messages in thread From: Lai Jiangshan @ 2008-11-17 12:57 UTC (permalink / raw) To: Ingo Molnar Cc: paulmck, Jonathan Corbet, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List Hi, Ingo, Is there any problem for this document added to kernel? Thanx, Lai. Paul E. McKenney wrote: > On Fri, Nov 14, 2008 at 12:25:43PM -0700, Jonathan Corbet wrote: >> On Fri, 14 Nov 2008 15:39:02 +0800 >> Lai Jiangshan <laijs@cn.fujitsu.com> wrote: >> >>> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> >>> >>> Paul E. McKenney wrote: >> All seems good. To answer Paul's original question: >> >>> If so, please see attached patch and let me know what you think. >>> Being too lazy to convert the cartoon to ASCII graphics, I simply >>> left a URL to the .jpg on the LWN website. Thus we need an ack/nack >>> from Jon Corbet (CCed). >> Of course I have no problem with the LWN links; they will remain stable. >> >> If nobody objects, I'll drop this into my documentation tree. Gotta >> have *something* there, after all, or it might start to feel >> neglected... > > Works for me! > > Thanx, Paul > > > ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 2008-11-17 12:57 ` Lai Jiangshan @ 2008-11-17 21:28 ` Jonathan Corbet 0 siblings, 0 replies; 16+ messages in thread From: Jonathan Corbet @ 2008-11-17 21:28 UTC (permalink / raw) To: Lai Jiangshan Cc: Ingo Molnar, paulmck, Peter Zijlstra, Andrew Morton, Linux Kernel Mailing List On Mon, 17 Nov 2008 20:57:40 +0800 Lai Jiangshan <laijs@cn.fujitsu.com> wrote: > Is there any problem for this document added to kernel? No problem; I'll get it queued up for 2.6.29 unless somebody else gets it there first. jon ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2008-11-17 21:29 UTC | newest] Thread overview: 16+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-11-06 6:47 [PATCH] rcupdate: move synchronize_sched() back to rcupdate.c V2 Lai Jiangshan 2008-11-06 6:57 ` Ingo Molnar 2008-11-09 0:51 ` Paul E. McKenney 2008-11-10 3:22 ` Lai Jiangshan 2008-11-10 18:45 ` Paul E. McKenney 2008-11-11 0:55 ` Lai Jiangshan 2008-11-11 1:03 ` Paul E. McKenney 2008-11-13 2:48 ` Lai Jiangshan 2008-11-13 17:31 ` Paul E. McKenney 2008-11-14 1:03 ` Lai Jiangshan 2008-11-14 2:11 ` Paul E. McKenney 2008-11-14 7:39 ` Lai Jiangshan 2008-11-14 19:25 ` Jonathan Corbet 2008-11-15 20:39 ` Paul E. McKenney 2008-11-17 12:57 ` Lai Jiangshan 2008-11-17 21:28 ` Jonathan Corbet
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox