Re: [PATCH RFC 3/9] RCU: Preemptible RCU

linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org, linux-rt-users@vger.kernel.org,
	mingo@elte.hu, akpm@linux-foundation.org, dipankar@in.ibm.com,
	josht@linux.vnet.ibm.com, tytso@us.ibm.com, dvhltc@us.ibm.com,
	tglx@linutronix.de, a.p.zijlstra@chello.nl, bunk@kernel.org,
	ego@in.ibm.com, oleg@tv-sign.ru, srostedt@redhat.com
Subject: Re: [PATCH RFC 3/9] RCU: Preemptible RCU
Date: Fri, 21 Sep 2007 16:03:43 -0700	[thread overview]
Message-ID: <20070921230343.GE9059@linux.vnet.ibm.com> (raw)
In-Reply-To: <20070921152048.GF15697@goodmis.org>

On Fri, Sep 21, 2007 at 11:20:48AM -0400, Steven Rostedt wrote:
> On Mon, Sep 10, 2007 at 11:34:12AM -0700, Paul E. McKenney wrote:
> > +
> > +/*
> > + * PREEMPT_RCU data structures.
> > + */
> > +
> > +#define GP_STAGES 4
> > +struct rcu_data {
> > +	spinlock_t	lock;		/* Protect rcu_data fields. */
> > +	long		completed;	/* Number of last completed batch. */
> > +	int		waitlistcount;
> > +	struct tasklet_struct rcu_tasklet;
> > +	struct rcu_head *nextlist;
> > +	struct rcu_head **nexttail;
> > +	struct rcu_head *waitlist[GP_STAGES];
> > +	struct rcu_head **waittail[GP_STAGES];
> > +	struct rcu_head *donelist;
> > +	struct rcu_head **donetail;
> > +#ifdef CONFIG_RCU_TRACE
> > +	struct rcupreempt_trace trace;
> > +#endif /* #ifdef CONFIG_RCU_TRACE */
> > +};
> > +struct rcu_ctrlblk {
> > +	spinlock_t	fliplock;	/* Protect state-machine transitions. */
> > +	long		completed;	/* Number of last completed batch. */
> > +};
> > +static DEFINE_PER_CPU(struct rcu_data, rcu_data);
> > +static struct rcu_ctrlblk rcu_ctrlblk = {
> > +	.fliplock = SPIN_LOCK_UNLOCKED,
> > +	.completed = 0,
> > +};
> > +static DEFINE_PER_CPU(int [2], rcu_flipctr) = { 0, 0 };
> > +
> > +/*
> > + * States for rcu_try_flip() and friends.
> > + */
> > +
> > +enum rcu_try_flip_states {
> > +	rcu_try_flip_idle_state,	/* "I" */
> > +	rcu_try_flip_waitack_state, 	/* "A" */
> > +	rcu_try_flip_waitzero_state,	/* "Z" */
> > +	rcu_try_flip_waitmb_state	/* "M" */
> > +};
> > +static enum rcu_try_flip_states rcu_try_flip_state = rcu_try_flip_idle_state;
> > +#ifdef CONFIG_RCU_TRACE
> > +static char *rcu_try_flip_state_names[] =
> > +	{ "idle", "waitack", "waitzero", "waitmb" };
> > +#endif /* #ifdef CONFIG_RCU_TRACE */
> 
> [snip]
> 
> > +/*
> > + * If a global counter flip has occurred since the last time that we
> > + * advanced callbacks, advance them.  Hardware interrupts must be
> > + * disabled when calling this function.
> > + */
> > +static void __rcu_advance_callbacks(struct rcu_data *rdp)
> > +{
> > +	int cpu;
> > +	int i;
> > +	int wlc = 0;
> > +
> > +	if (rdp->completed != rcu_ctrlblk.completed) {
> > +		if (rdp->waitlist[GP_STAGES - 1] != NULL) {
> > +			*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
> > +			rdp->donetail = rdp->waittail[GP_STAGES - 1];
> > +			RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
> > +		}
> > +		for (i = GP_STAGES - 2; i >= 0; i--) {
> > +			if (rdp->waitlist[i] != NULL) {
> > +				rdp->waitlist[i + 1] = rdp->waitlist[i];
> > +				rdp->waittail[i + 1] = rdp->waittail[i];
> > +				wlc++;
> > +			} else {
> > +				rdp->waitlist[i + 1] = NULL;
> > +				rdp->waittail[i + 1] =
> > +					&rdp->waitlist[i + 1];
> > +			}
> > +		}
> > +		if (rdp->nextlist != NULL) {
> > +			rdp->waitlist[0] = rdp->nextlist;
> > +			rdp->waittail[0] = rdp->nexttail;
> > +			wlc++;
> > +			rdp->nextlist = NULL;
> > +			rdp->nexttail = &rdp->nextlist;
> > +			RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
> > +		} else {
> > +			rdp->waitlist[0] = NULL;
> > +			rdp->waittail[0] = &rdp->waitlist[0];
> > +		}
> > +		rdp->waitlistcount = wlc;
> > +		rdp->completed = rcu_ctrlblk.completed;
> > +	}
> > +
> > +	/*
> > +	 * Check to see if this CPU needs to report that it has seen
> > +	 * the most recent counter flip, thereby declaring that all
> > +	 * subsequent rcu_read_lock() invocations will respect this flip.
> > +	 */
> > +
> > +	cpu = raw_smp_processor_id();
> > +	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
> > +		smp_mb();  /* Subsequent counter accesses must see new value */
> > +		per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
> > +		smp_mb();  /* Subsequent RCU read-side critical sections */
> > +			   /*  seen -after- acknowledgement. */
> > +	}
> > +}
> 
> [snip]
> 
> > +/*
> > + * Attempt a single flip of the counters.  Remember, a single flip does
> > + * -not- constitute a grace period.  Instead, the interval between
> > + * at least three consecutive flips is a grace period.
> > + *
> > + * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
> > + * on a large SMP, they might want to use a hierarchical organization of
> > + * the per-CPU-counter pairs.
> > + */
> > +static void rcu_try_flip(void)
> > +{
> > +	unsigned long oldirq;
> > +
> > +	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
> > +	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, oldirq))) {
> > +		RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Take the next transition(s) through the RCU grace-period
> > +	 * flip-counter state machine.
> > +	 */
> > +
> > +	switch (rcu_try_flip_state) {
> > +	case rcu_try_flip_idle_state:
> > +		if (rcu_try_flip_idle())
> > +			rcu_try_flip_state = rcu_try_flip_waitack_state;
> > +		break;
> > +	case rcu_try_flip_waitack_state:
> > +		if (rcu_try_flip_waitack())
> > +			rcu_try_flip_state = rcu_try_flip_waitzero_state;
> > +		break;
> > +	case rcu_try_flip_waitzero_state:
> > +		if (rcu_try_flip_waitzero())
> > +			rcu_try_flip_state = rcu_try_flip_waitmb_state;
> > +		break;
> > +	case rcu_try_flip_waitmb_state:
> > +		if (rcu_try_flip_waitmb())
> > +			rcu_try_flip_state = rcu_try_flip_idle_state;
> > +	}
> > +	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, oldirq);
> > +}
> 
> Paul,
> 
> Looking further into this, I still think this is a bit of overkill. We
> go through 20 states from call_rcu to list->func().
> 
> On call_rcu we put our stuff on the next list. Before we move stuff from
> next to wait, we need to go through 4 states. So we have
> 
> next -> 4 states -> wait[0] -> 4 states -> wait[1] -> 4 states ->
> wait[2] -> 4 states -> wait[3] -> 4 states -> done.
> 
> That's 20 states that we go through from the time we add our function to
> the list to the time it actually gets called. Do we really need the 4
> wait lists?
> 
> Seems a bit overkill to me.
> 
> What am I missing?

"Nothing kills like overkill!!!"  ;-)

Seriously, I do expect to be able to squeeze this down over time, but
feel the need to be a bit on the cowardly side at the moment.

In any case, I will be looking at the scenarios more carefully.  If
it turns out that GP_STAGES can indeed be cranked down a bit, well,
that is an easy change!  I just fired off a POWER run with GP_STAGES
set to 3, will let you know how it goes.

						Thanx, Paul

next prev parent reply	other threads:[~2007-09-21 23:03 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-10 18:30 [PATCH RFC 0/9] RCU: Preemptible RCU Paul E. McKenney
2007-09-10 18:32 ` [PATCH RFC 1/9] RCU: Split API to permit multiple RCU implementations Paul E. McKenney
2007-09-21  4:14   ` Steven Rostedt
2007-09-10 18:33 ` [PATCH RFC 2/9] RCU: Fix barriers Paul E. McKenney
2007-09-10 18:34 ` [PATCH RFC 3/9] RCU: Preemptible RCU Paul E. McKenney
2007-09-21  4:17   ` Steven Rostedt
2007-09-21  5:50     ` Paul E. McKenney
2007-09-21  5:56     ` Dipankar Sarma
2007-09-21 14:40   ` Steven Rostedt
2007-09-21 15:46     ` Peter Zijlstra
2007-09-21 22:06       ` Paul E. McKenney
2007-09-21 22:31       ` Steven Rostedt
2007-09-21 22:44         ` Paul E. McKenney
2007-09-21 23:23           ` Steven Rostedt
2007-09-21 23:44             ` Paul E. McKenney
2007-09-22  0:26     ` Paul E. McKenney
2007-09-22  1:15       ` Steven Rostedt
2007-09-22  1:53         ` Paul E. McKenney
2007-09-22  3:15           ` Steven Rostedt
2007-09-22  4:07             ` Paul E. McKenney
2007-09-21 15:20   ` Steven Rostedt
2007-09-21 23:03     ` Paul E. McKenney [this message]
2007-09-22  0:32       ` Paul E. McKenney
2007-09-22  1:19         ` Steven Rostedt
2007-09-22  1:43           ` Paul E. McKenney
2007-09-22  2:56             ` Steven Rostedt
2007-09-22  4:10               ` Paul E. McKenney
2007-09-23 17:38   ` Oleg Nesterov
2007-09-24  0:15     ` Paul E. McKenney
2007-09-26 15:13       ` Oleg Nesterov
2007-09-27 15:46         ` Paul E. McKenney
2007-09-28 14:47           ` Oleg Nesterov
2007-09-28 18:57             ` Paul E. McKenney
2007-09-30 16:31               ` Oleg Nesterov
2007-09-30 23:02                 ` Davide Libenzi
2007-10-01  1:37                   ` Paul E. McKenney
2007-10-01 18:44                     ` Davide Libenzi
2007-10-01 19:21                       ` Paul E. McKenney
2007-10-01 22:09                         ` Davide Libenzi
2007-10-01 22:24                           ` Paul E. McKenney
2007-10-02 18:02                     ` Oleg Nesterov
2007-10-01  1:20                 ` Paul E. McKenney
2007-09-10 18:35 ` [PATCH RFC 4/9] RCU: synchronize_sched() workaround for CPU hotplug Paul E. McKenney
2007-09-10 18:36 ` [PATCH RFC 5/9] RCU: CPU hotplug support for preemptible RCU Paul E. McKenney
2007-09-30 16:38   ` Oleg Nesterov
2007-10-01  1:41     ` Paul E. McKenney
2007-09-10 18:39 ` [PATCH RFC 6/9] RCU priority boosting " Paul E. McKenney
2007-09-28 22:56   ` Gautham R Shenoy
2007-09-28 23:05     ` Steven Rostedt
2007-09-30  3:11       ` Paul E. McKenney
2007-10-05 11:46   ` Gautham R Shenoy
2007-10-05 12:24     ` Steven Rostedt
2007-10-05 13:21       ` Gautham R Shenoy
2007-10-05 14:07         ` Paul E. McKenney
2007-09-10 18:39 ` [PATCH RFC 7/9] RCU: rcutorture testing for RCU priority boosting Paul E. McKenney
2007-09-10 18:41 ` [PATCH RFC 8/9] RCU: Make RCU priority boosting consume less power Paul E. McKenney
2007-09-10 18:42 ` [PATCH RFC 9/9] RCU: preemptible documentation and comment cleanups Paul E. McKenney
2007-09-10 18:44 ` [PATCH RFC 0/9] RCU: Preemptible RCU Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070921230343.GE9059@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=bunk@kernel.org \
    --cc=dipankar@in.ibm.com \
    --cc=dvhltc@us.ibm.com \
    --cc=ego@in.ibm.com \
    --cc=josht@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@tv-sign.ru \
    --cc=rostedt@goodmis.org \
    --cc=srostedt@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tytso@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).