Re: [PATCH RFC 01/16] prcu: Add PRCU implementation

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: lianglihao@huawei.com
Cc: guohanjun@huawei.com, heng.z@huawei.com, hb.chen@huawei.com,
	lihao.liang@gmail.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH RFC 01/16] prcu: Add PRCU implementation
Date: Wed, 24 Jan 2018 22:16:18 -0800	[thread overview]
Message-ID: <20180125061618.GU3741@linux.vnet.ibm.com> (raw)
In-Reply-To: <1516694381-20333-2-git-send-email-lianglihao@huawei.com>

On Tue, Jan 23, 2018 at 03:59:26PM +0800, lianglihao@huawei.com wrote:
> From: Heng Zhang <heng.z@huawei.com>
> 
> This RCU implementation (PRCU) is based on a fast consensus protocol
> published in the following paper:
> 
> Fast Consensus Using Bounded Staleness for Scalable Read-mostly Synchronization.
> Haibo Chen, Heng Zhang, Ran Liu, Binyu Zang, and Haibing Guan.
> IEEE Transactions on Parallel and Distributed Systems (TPDS), 2016.
> https://dl.acm.org/citation.cfm?id=3024114.3024143
> 
> Signed-off-by: Heng Zhang <heng.z@huawei.com>
> Signed-off-by: Lihao Liang <lianglihao@huawei.com>

A few comments and questions interspersed.

							Thanx, Paul

> ---
>  include/linux/prcu.h |  37 +++++++++++++++
>  kernel/rcu/Makefile  |   2 +-
>  kernel/rcu/prcu.c    | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/sched/core.c  |   2 +
>  4 files changed, 165 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/prcu.h
>  create mode 100644 kernel/rcu/prcu.c
> 
> diff --git a/include/linux/prcu.h b/include/linux/prcu.h
> new file mode 100644
> index 00000000..653b4633
> --- /dev/null
> +++ b/include/linux/prcu.h
> @@ -0,0 +1,37 @@
> +#ifndef __LINUX_PRCU_H
> +#define __LINUX_PRCU_H
> +
> +#include <linux/atomic.h>
> +#include <linux/mutex.h>
> +#include <linux/wait.h>
> +
> +#define CONFIG_PRCU
> +
> +struct prcu_local_struct {
> +	unsigned int locked;
> +	unsigned int online;
> +	unsigned long long version;
> +};
> +
> +struct prcu_struct {
> +	atomic64_t global_version;
> +	atomic_t active_ctr;
> +	struct mutex mtx;
> +	wait_queue_head_t wait_q;
> +};
> +
> +#ifdef CONFIG_PRCU
> +void prcu_read_lock(void);
> +void prcu_read_unlock(void);
> +void synchronize_prcu(void);
> +void prcu_note_context_switch(void);
> +
> +#else /* #ifdef CONFIG_PRCU */
> +
> +#define prcu_read_lock() do {} while (0)
> +#define prcu_read_unlock() do {} while (0)
> +#define synchronize_prcu() do {} while (0)
> +#define prcu_note_context_switch() do {} while (0)

If CONFIG_PRCU=n and some code is built that uses PRCU, shouldn't you
get a build error rather than an error-free but inoperative PRCU?

Of course, Peter's question about purpose of the patch set applies
here as well.

> +
> +#endif /* #ifdef CONFIG_PRCU */
> +#endif /* __LINUX_PRCU_H */
> diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
> index 23803c7d..8791419c 100644
> --- a/kernel/rcu/Makefile
> +++ b/kernel/rcu/Makefile
> @@ -2,7 +2,7 @@
>  # and is generally not a function of system call inputs.
>  KCOV_INSTRUMENT := n
> 
> -obj-y += update.o sync.o
> +obj-y += update.o sync.o prcu.o
>  obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
>  obj-$(CONFIG_TREE_SRCU) += srcutree.o
>  obj-$(CONFIG_TINY_SRCU) += srcutiny.o
> diff --git a/kernel/rcu/prcu.c b/kernel/rcu/prcu.c
> new file mode 100644
> index 00000000..a00b9420
> --- /dev/null
> +++ b/kernel/rcu/prcu.c
> @@ -0,0 +1,125 @@
> +#include <linux/smp.h>
> +#include <linux/prcu.h>
> +#include <linux/percpu.h>
> +#include <linux/compiler.h>
> +#include <linux/sched.h>
> +
> +#include <asm/barrier.h>
> +
> +DEFINE_PER_CPU_SHARED_ALIGNED(struct prcu_local_struct, prcu_local);
> +
> +struct prcu_struct global_prcu = {
> +	.global_version = ATOMIC64_INIT(0),
> +	.active_ctr = ATOMIC_INIT(0),
> +	.mtx = __MUTEX_INITIALIZER(global_prcu.mtx),
> +	.wait_q = __WAIT_QUEUE_HEAD_INITIALIZER(global_prcu.wait_q)
> +};
> +struct prcu_struct *prcu = &global_prcu;
> +
> +static inline void prcu_report(struct prcu_local_struct *local)
> +{
> +	unsigned long long global_version;
> +	unsigned long long local_version;
> +
> +	global_version = atomic64_read(&prcu->global_version);
> +	local_version = local->version;
> +	if (global_version > local_version)
> +		cmpxchg(&local->version, local_version, global_version);
> +}
> +
> +void prcu_read_lock(void)
> +{
> +	struct prcu_local_struct *local;
> +
> +	local = get_cpu_ptr(&prcu_local);
> +	if (!local->online) {
> +		WRITE_ONCE(local->online, 1);
> +		smp_mb();
> +	}
> +
> +	local->locked++;
> +	put_cpu_ptr(&prcu_local);
> +}
> +EXPORT_SYMBOL(prcu_read_lock);
> +
> +void prcu_read_unlock(void)
> +{
> +	int locked;
> +	struct prcu_local_struct *local;
> +
> +	barrier();
> +	local = get_cpu_ptr(&prcu_local);
> +	locked = local->locked;
> +	if (locked) {
> +		local->locked--;
> +		if (locked == 1)
> +			prcu_report(local);

Is ordering important here?  It looks to me that the compiler could
rearrange some of the accesses within prcu_report() with the local->locked
decrement.  There appears to be some potential for load and store tearing,
though perhaps you have verified that your compiler avoids this on
the architecture that you are using.

> +		put_cpu_ptr(&prcu_local);
> +	} else {

Hmmm...  We get here if the RCU read-side critical section was preempted.
If none of them are preempted, ->active_ctr remains zero.

> +		put_cpu_ptr(&prcu_local);
> +		if (!atomic_dec_return(&prcu->active_ctr))
> +			wake_up(&prcu->wait_q);
> +	}
> +}
> +EXPORT_SYMBOL(prcu_read_unlock);
> +
> +static void prcu_handler(void *info)
> +{
> +	struct prcu_local_struct *local;
> +
> +	local = this_cpu_ptr(&prcu_local);
> +	if (!local->locked)
> +		WRITE_ONCE(local->version, atomic64_read(&prcu->global_version));
> +}
> +
> +void synchronize_prcu(void)
> +{
> +	int cpu;
> +	cpumask_t cpus;
> +	unsigned long long version;
> +	struct prcu_local_struct *local;
> +
> +	version = atomic64_add_return(1, &prcu->global_version);
> +	mutex_lock(&prcu->mtx);
> +
> +	local = get_cpu_ptr(&prcu_local);
> +	local->version = version;
> +	put_cpu_ptr(&prcu_local);
> +
> +	cpumask_clear(&cpus);
> +	for_each_possible_cpu(cpu) {
> +		local = per_cpu_ptr(&prcu_local, cpu);
> +		if (!READ_ONCE(local->online))
> +			continue;
> +		if (READ_ONCE(local->version) < version) {

On 32-bit systems, given that ->version is long long, you might see
load tearing.  And on some 32-bit systems, the cmpxchg() in prcu_hander()
might not build.

Or is the idea that only prcu_handler() updates ->version?  But in that
case, you wouldn't need the READ_ONCE() above.  What am I missing here?

> +			smp_call_function_single(cpu, prcu_handler, NULL, 0);
> +			cpumask_set_cpu(cpu, &cpus);
> +		}
> +	}
> +
> +	for_each_cpu(cpu, &cpus) {
> +		local = per_cpu_ptr(&prcu_local, cpu);
> +		while (READ_ONCE(local->version) < version)

This ->version read can also tear on some 32-bit systems, and this
one most definitely can race with the prcu_handler() above.  Does the
algorithm operate correctly in that case?  (It doesn't look that way
to me, but I might be missing something.) Or are 32-bit systems excluded?

> +			cpu_relax();
> +	}

I might be missing something, but I believe we need a memory barrier
here on non-TSO systems.  Without that, couldn't we miss a preemption?

> +
> +	if (atomic_read(&prcu->active_ctr))
> +		wait_event(prcu->wait_q, !atomic_read(&prcu->active_ctr));
> +
> +	mutex_unlock(&prcu->mtx);
> +}
> +EXPORT_SYMBOL(synchronize_prcu);
> +
> +void prcu_note_context_switch(void)
> +{
> +	struct prcu_local_struct *local;
> +
> +	local = get_cpu_ptr(&prcu_local);
> +	if (local->locked) {
> +		atomic_add(local->locked, &prcu->active_ctr);
> +		local->locked = 0;
> +	}
> +	local->online = 0;
> +	prcu_report(local);
> +	put_cpu_ptr(&prcu_local);
> +}
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 326d4f88..a308581b 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -15,6 +15,7 @@
>  #include <linux/init_task.h>
>  #include <linux/context_tracking.h>
>  #include <linux/rcupdate_wait.h>
> +#include <linux/prcu.h>
> 
>  #include <linux/blkdev.h>
>  #include <linux/kprobes.h>
> @@ -3383,6 +3384,7 @@ static void __sched notrace __schedule(bool preempt)
> 
>  	local_irq_disable();
>  	rcu_note_context_switch(preempt);
> +	prcu_note_context_switch();
> 
>  	/*
>  	 * Make sure that signal_pending_state()->signal_pending() below
> -- 
> 2.14.1.729.g59c0ea183
>

next prev parent reply	other threads:[~2018-01-25  6:30 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-23  7:59 [PATCH RFC 00/16] A new RCU implementation based on a fast consensus protocol lianglihao
2018-01-23  7:59 ` [PATCH RFC 01/16] prcu: Add PRCU implementation lianglihao
2018-01-24 11:26   ` Peter Zijlstra
2018-01-24 17:15     ` Lihao Liang
2018-01-24 20:19       ` Peter Zijlstra
2018-01-25  6:16   ` Paul E. McKenney [this message]
2018-01-25  7:30     ` Boqun Feng
2018-01-30  5:34       ` zhangheng (AC)
2018-01-30  6:40         ` Boqun Feng
2018-01-30 10:42           ` zhangheng (AC)
2018-01-27  7:35     ` Lihao Liang
2018-01-30  3:58     ` zhangheng (AC)
2018-01-29  9:10   ` Lai Jiangshan
2018-01-30  6:21     ` zhangheng (AC)
2018-01-23  7:59 ` [PATCH RFC 02/16] rcutorture: Add PRCU rcu_torture_ops lianglihao
2018-01-23  7:59 ` [PATCH RFC 03/16] rcutorture: Add PRCU test config files lianglihao
2018-01-25  6:27   ` Paul E. McKenney
2018-01-23  7:59 ` [PATCH RFC 04/16] rcuperf: Add PRCU rcu_perf_ops lianglihao
2018-01-23  7:59 ` [PATCH RFC 05/16] rcuperf: Add PRCU test config files lianglihao
2018-01-23  7:59 ` [PATCH RFC 06/16] rcuperf: Set gp_exp to true for tests to run lianglihao
2018-01-25  6:18   ` Paul E. McKenney
2018-01-26  8:33     ` Lihao Liang
2018-01-23  7:59 ` [PATCH RFC 07/16] prcu: Implement call_prcu() API lianglihao
2018-01-25  6:20   ` Paul E. McKenney
2018-01-26  8:44     ` Lihao Liang
2018-01-26 22:22       ` Paul E. McKenney
2018-01-23  7:59 ` [PATCH RFC 08/16] prcu: Implement PRCU callback processing lianglihao
2018-01-23  7:59 ` [PATCH RFC 09/16] prcu: Implement prcu_barrier() API lianglihao
2018-01-25  6:24   ` Paul E. McKenney
2018-01-23  7:59 ` [PATCH RFC 10/16] rcutorture: Test call_prcu() and prcu_barrier() lianglihao
2018-01-23  7:59 ` [PATCH RFC 11/16] rcutorture: Add basic ARM64 support to run scripts lianglihao
2018-01-23  7:59 ` [PATCH RFC 12/16] prcu: Add PRCU Kconfig parameter lianglihao
2018-01-23  7:59 ` [PATCH RFC 13/16] prcu: Comment source code lianglihao
2018-01-23  7:59 ` [PATCH RFC 14/16] rcuperf: Add config files with various CONFIG_NR_CPUS lianglihao
2018-01-23  7:59 ` [PATCH RFC 15/16] rcutorture: Add scripts to run experiments lianglihao
2018-01-25  6:28   ` Paul E. McKenney
2018-01-23  7:59 ` [PATCH RFC 16/16] Add GPLv2 license lianglihao
2018-01-25  5:53 ` [PATCH RFC 00/16] A new RCU implementation based on a fast consensus protocol Paul E. McKenney
2018-01-27  7:22   ` Lihao Liang
2018-01-27  7:57     ` Paul E. McKenney
2018-01-27  9:57       ` Lihao Liang
2018-01-27 23:46         ` Paul E. McKenney
2018-01-27 23:41       ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180125061618.GU3741@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    --cc=guohanjun@huawei.com \
    --cc=hb.chen@huawei.com \
    --cc=heng.z@huawei.com \
    --cc=lianglihao@huawei.com \
    --cc=lihao.liang@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.