From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>,
Paul McKenney <paulmck@linux.vnet.ibm.com>,
Mel Gorman <mgorman@suse.de>, Rik van Riel <riel@redhat.com>,
Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
Ingo Molnar <mingo@kernel.org>,
Andrea Arcangeli <aarcange@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Thomas Gleixner <tglx@linutronix.de>,
Steven Rostedt <rostedt@goodmis.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/6] rcu: Create rcu_sync infrastructure
Date: Thu, 17 Oct 2013 10:56:31 +0800 [thread overview]
Message-ID: <525F51DF.4080809@cn.fujitsu.com> (raw)
In-Reply-To: <20131008103830.180541879@infradead.org>
On 10/08/2013 06:25 PM, Peter Zijlstra wrote:
> From: Oleg Nesterov <oleg@redhat.com>
>
> It is functionally equivalent to
>
> struct rcu_sync_struct {
> atomic_t counter;
> };
>
> static inline bool rcu_sync_is_idle(struct rcu_sync_struct *xxx)
> {
> return atomic_read(&xxx->counter) == 0;
> }
>
> static inline void rcu_sync_enter(struct rcu_sync_struct *xxx)
> {
> atomic_inc(&xxx->counter);
> synchronize_sched();
> }
>
> static inline void rcu_sync_enter(struct rcu_sync_struct *xxx)
> {
> synchronize_sched();
> atomic_dec(&xxx->counter);
> }
>
> except: it records the state and synchronize_sched() is only called by
> rcu_sync_enter() and only if necessary.
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> Reviewed-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
> Signed-off-by: Peter Zijlstra <peterz@infradead.org>
> Link: http://lkml.kernel.org/r/20130929183634.GA15563@redhat.com
> ---
> include/linux/rcusync.h | 64 ++++++++++++++++++++++++++++
> kernel/Makefile | 3 -
> kernel/rcusync.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 174 insertions(+), 1 deletion(-)
>
> --- /dev/null
> +++ b/include/linux/rcusync.h
> @@ -0,0 +1,64 @@
> +#ifndef _LINUX_RCUSYNC_H_
> +#define _LINUX_RCUSYNC_H_
> +
> +#include <linux/wait.h>
> +#include <linux/rcupdate.h>
> +
> +struct rcu_sync_struct {
> + int gp_state;
> + int gp_count;
> + wait_queue_head_t gp_wait;
> +
> + int cb_state;
> + struct rcu_head cb_head;
> +
> + void (*sync)(void);
> + void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
> +};
> +
> +#define ___RCU_SYNC_INIT(name) \
> + .gp_state = 0, \
> + .gp_count = 0, \
> + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
> + .cb_state = 0
> +
> +#define __RCU_SCHED_SYNC_INIT(name) { \
> + ___RCU_SYNC_INIT(name), \
> + .sync = synchronize_sched, \
> + .call = call_rcu_sched, \
> +}
> +
> +#define __RCU_BH_SYNC_INIT(name) { \
> + ___RCU_SYNC_INIT(name), \
> + .sync = synchronize_rcu_bh, \
> + .call = call_rcu_bh, \
> +}
> +
> +#define __RCU_SYNC_INIT(name) { \
> + ___RCU_SYNC_INIT(name), \
> + .sync = synchronize_rcu, \
> + .call = call_rcu, \
> +}
> +
> +#define DEFINE_RCU_SCHED_SYNC(name) \
> + struct rcu_sync_struct name = __RCU_SCHED_SYNC_INIT(name)
> +
> +#define DEFINE_RCU_BH_SYNC(name) \
> + struct rcu_sync_struct name = __RCU_BH_SYNC_INIT(name)
> +
> +#define DEFINE_RCU_SYNC(name) \
> + struct rcu_sync_struct name = __RCU_SYNC_INIT(name)
> +
> +static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
> +{
> + return !rss->gp_state; /* GP_IDLE */
> +}
Hi, All
We may need to use ACCESS_ONCE() here to avoid the compiler access it multi-times.
it would be better: return ACCESS_ONCE(rss->gp_state) == GP_IDLE;
-my comment continues until it reaches a "Thanks".-
> +
> +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
> +
> +extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
> +extern void rcu_sync_enter(struct rcu_sync_struct *);
> +extern void rcu_sync_exit(struct rcu_sync_struct *);
> +
> +#endif /* _LINUX_RCUSYNC_H_ */
> +
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -10,7 +10,8 @@ obj-y = fork.o exec_domain.o panic.o
> kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \
> hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
> notifier.o ksysfs.o cred.o reboot.o \
> - async.o range.o groups.o lglock.o smpboot.o
> + async.o range.o groups.o lglock.o smpboot.o \
> + rcusync.o
>
> ifdef CONFIG_FUNCTION_TRACER
> # Do not trace debug files and internal ftrace files
> --- /dev/null
> +++ b/kernel/rcusync.c
> @@ -0,0 +1,108 @@
> +
> +#include <linux/rcusync.h>
> +#include <linux/sched.h>
> +
> +enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
> +enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
> +
> +#define rss_lock gp_wait.lock
> +
> +void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type)
> +{
> + memset(rss, 0, sizeof(*rss));
> + init_waitqueue_head(&rss->gp_wait);
> +
> + switch (type) {
> + case RCU_SYNC:
> + rss->sync = synchronize_rcu;
> + rss->call = call_rcu;
> + break;
> +
> + case RCU_SCHED_SYNC:
> + rss->sync = synchronize_sched;
> + rss->call = call_rcu_sched;
> + break;
> +
> + case RCU_BH_SYNC:
> + rss->sync = synchronize_rcu_bh;
> + rss->call = call_rcu_bh;
> + break;
> + }
> +}
> +
> +void rcu_sync_enter(struct rcu_sync_struct *rss)
> +{
> + bool need_wait, need_sync;
> +
> + spin_lock_irq(&rss->rss_lock);
> + need_wait = rss->gp_count++;
> + need_sync = rss->gp_state == GP_IDLE;
I suggest that "need_wait = rss->gp_state == GP_PENDING;"
> + if (need_sync)
> + rss->gp_state = GP_PENDING;
> + spin_unlock_irq(&rss->rss_lock);
> +
> + BUG_ON(need_wait && need_sync);
> +
> + if (need_sync) {
> + rss->sync();
> + rss->gp_state = GP_PASSED;
> + wake_up_all(&rss->gp_wait);
> + } else if (need_wait) {
> + wait_event(rss->gp_wait, rss->gp_state == GP_PASSED);
memory_barrier is required here in case rss->gp_state is read without rss_lock held.
CPU1 CPU2 CPU3
rcu_read_lock()
rcu_sync_is_idle()
rcu_sync_enter()
rss->sync() rcu_sync_enter()
/* XXXX */
rcu_read_unlock()
rss->sync() returns
rss->gp_state = GP_PASSED;
wait_event()(fastpath)
/* code here maybe run on XXX by CPU reorder */
> + } else {
> + /*
> + * Possible when there's a pending CB from a rcu_sync_exit().
> + * Nobody has yet been allowed the 'fast' path and thus we can
> + * avoid doing any sync(). The callback will get 'dropped'.
> + */
> + BUG_ON(rss->gp_state != GP_PASSED);
memory_barrier is required here like above.
> + }
> +}
> +
> +static void rcu_sync_func(struct rcu_head *rcu)
> +{
> + struct rcu_sync_struct *rss =
> + container_of(rcu, struct rcu_sync_struct, cb_head);
> + unsigned long flags;
> +
> +
> + BUG_ON(rss->gp_state != GP_PASSED);
> + BUG_ON(rss->cb_state == CB_IDLE);
> +
> + spin_lock_irqsave(&rss->rss_lock, flags);
> + if (rss->gp_count) {
> + /*
> + * A new rcu_sync_begin() has happened; drop the callback.
> + */
> + rss->cb_state = CB_IDLE;
> + } else if (rss->cb_state == CB_REPLAY) {
> + /*
> + * A new rcu_sync_exit() has happened; requeue the callback
> + * to catch a later GP.
> + */
> + rss->cb_state = CB_PENDING;
> + rss->call(&rss->cb_head, rcu_sync_func);
> + } else {
> + /*
> + * We're at least a GP after rcu_sync_exit(); eveybody will now
s/eveybody/everybody/
Please add
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Thanks,
Lai
> + * have observed the write side critical section. Let 'em rip!.
> + */
> + rss->cb_state = CB_IDLE;
> + rss->gp_state = GP_IDLE;
> + }
> + spin_unlock_irqrestore(&rss->rss_lock, flags);
> +}
> +
> +void rcu_sync_exit(struct rcu_sync_struct *rss)
> +{
> + spin_lock_irq(&rss->rss_lock);
> + if (!--rss->gp_count) {
> + if (rss->cb_state == CB_IDLE) {
> + rss->cb_state = CB_PENDING;
> + rss->call(&rss->cb_head, rcu_sync_func);
> + } else if (rss->cb_state == CB_PENDING) {
> + rss->cb_state = CB_REPLAY;
> + }
> + }
> + spin_unlock_irq(&rss->rss_lock);
> +}
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
next prev parent reply other threads:[~2013-10-17 2:52 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-08 10:25 [PATCH 0/6] Optimize the cpu hotplug locking -v2 Peter Zijlstra
2013-10-08 10:25 ` [PATCH 1/6] hotplug: Optimize {get,put}_online_cpus() Peter Zijlstra
2013-10-08 15:08 ` Rik van Riel
2013-10-10 5:47 ` Andrew Morton
2013-10-10 11:06 ` Oleg Nesterov
2013-10-10 14:55 ` Paul E. McKenney
2013-10-08 10:25 ` [PATCH 2/6] rcu: Create rcu_sync infrastructure Peter Zijlstra
2013-10-08 20:40 ` Jonathan Corbet
2013-10-09 19:52 ` Peter Zijlstra
2013-10-17 2:56 ` Lai Jiangshan [this message]
2013-10-17 10:36 ` Srikar Dronamraju
2013-10-08 10:25 ` [PATCH 3/6] hotplug: Optimize cpu_hotplug_{begin,done}() using rcu_sync Peter Zijlstra
2013-10-08 16:28 ` Paul E. McKenney
2013-10-08 10:25 ` [PATCH 4/6] rcusync: Introduce struct rcu_sync_ops Peter Zijlstra
2013-10-08 16:30 ` Paul E. McKenney
2013-10-17 2:07 ` Lai Jiangshan
[not found] ` <20131017154228.GL4553@linux.vnet.ibm.com>
2013-10-18 1:23 ` Lai Jiangshan
2013-10-18 12:10 ` Oleg Nesterov
2013-10-20 16:58 ` Paul E. McKenney
2013-10-08 10:25 ` [PATCH 5/6] rcusync: Add the CONFIG_PROVE_RCU checks Peter Zijlstra
2013-10-08 16:30 ` Paul E. McKenney
2013-10-08 10:25 ` [PATCH 6/6] rcusync: Introduce rcu_sync_dtor() Peter Zijlstra
2013-10-08 16:32 ` Paul E. McKenney
2013-10-08 15:27 ` [PATCH 0/6] Optimize the cpu hotplug locking -v2 Oleg Nesterov
2013-10-08 15:38 ` Peter Zijlstra
2013-10-10 5:50 ` Andrew Morton
2013-10-10 6:27 ` Ingo Molnar
2013-10-10 6:34 ` Andrew Morton
2013-10-10 7:27 ` Ingo Molnar
2013-10-10 7:33 ` Andrew Morton
2013-10-10 7:45 ` Ingo Molnar
2013-10-10 12:19 ` Peter Zijlstra
2013-10-10 14:57 ` Ingo Molnar
2013-10-10 15:21 ` Peter Zijlstra
2013-10-10 15:36 ` Oleg Nesterov
2013-10-10 16:50 ` Ingo Molnar
2013-10-10 17:13 ` Paul E. McKenney
2013-10-10 17:35 ` Ingo Molnar
2013-10-10 18:35 ` Peter Zijlstra
2013-10-10 15:26 ` Oleg Nesterov
2013-10-10 16:00 ` Andrew Morton
2013-10-10 16:36 ` Steven Rostedt
2013-10-10 16:43 ` Andrew Morton
2013-10-10 16:53 ` Peter Zijlstra
2013-10-10 17:13 ` Steven Rostedt
2013-10-10 17:48 ` Andrew Morton
2013-10-10 18:10 ` Linus Torvalds
2013-10-10 18:43 ` Steven Rostedt
2013-10-10 18:50 ` Peter Zijlstra
2013-10-10 19:15 ` Paul E. McKenney
2013-10-10 19:00 ` Linus Torvalds
2013-10-10 18:46 ` Peter Zijlstra
2013-10-10 18:34 ` Peter Zijlstra
2013-10-10 18:49 ` Linus Torvalds
2013-10-10 19:04 ` Steven Rostedt
2013-10-10 19:16 ` Linus Torvalds
2013-10-10 19:34 ` Peter Zijlstra
2013-10-10 19:34 ` Steven Rostedt
2013-10-11 6:09 ` Ingo Molnar
2013-10-11 12:38 ` Peter Zijlstra
2013-10-11 18:25 ` Oleg Nesterov
2013-10-11 20:48 ` Peter Zijlstra
2013-10-12 17:06 ` Oleg Nesterov
2013-10-14 9:05 ` Peter Zijlstra
2013-10-14 9:23 ` Paul E. McKenney
2013-10-15 1:01 ` Paul E. McKenney
2013-10-17 16:49 ` [tip:sched/core] sched: Remove get_online_cpus() usage tip-bot for Peter Zijlstra
2013-10-10 17:39 ` [PATCH 0/6] Optimize the cpu hotplug locking -v2 Oleg Nesterov
2013-10-10 16:52 ` Ingo Molnar
2013-10-10 17:44 ` Paul E. McKenney
2013-10-10 16:54 ` Oleg Nesterov
2013-10-10 19:04 ` Srivatsa S. Bhat
2013-10-10 18:52 ` Srivatsa S. Bhat
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=525F51DF.4080809@cn.fujitsu.com \
--to=laijs@cn.fujitsu.com \
--cc=aarcange@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=rostedt@goodmis.org \
--cc=srikar@linux.vnet.ibm.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.