Re: [PATCH v4 1/4] rcu: Reduce synchronize_rcu() latency

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Uladzislau Rezki <urezki@gmail.com>
To: "Paul E. McKenney" <paulmck@kernel.org>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>,
	RCU <rcu@vger.kernel.org>,
	Neeraj upadhyay <Neeraj.Upadhyay@amd.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	Hillf Danton <hdanton@sina.com>,
	Joel Fernandes <joel@joelfernandes.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>,
	Frederic Weisbecker <frederic@kernel.org>
Subject: Re: [PATCH v4 1/4] rcu: Reduce synchronize_rcu() latency
Date: Wed, 17 Jan 2024 13:26:19 +0100	[thread overview]
Message-ID: <ZafHa37MS_eYMEr6@pc636> (raw)
In-Reply-To: <8edf32ff-ea10-43f3-8820-b67f1691bad2@paulmck-laptop>

> > +/*
> > + * There are three lists for handling synchronize_rcu() users.
> > + * A first list corresponds to new coming users, second for users
> > + * which wait for a grace period and third is for which a grace
> > + * period is passed.
> > + */
> > +static struct sr_normal_state {
> > +	struct llist_head srs_next;	/* request a GP users. */
> > +	struct llist_head srs_wait;	/* wait for GP users. */
> > +	struct llist_head srs_done;	/* ready for GP users. */
> > +
> > +	/*
> > +	 * In order to add a batch of nodes to already
> > +	 * existing srs-done-list, a tail of srs-wait-list
> > +	 * is maintained.
> > +	 */
> > +	struct llist_node *srs_wait_tail;
> > +} sr;
> 
> Please put this in the rcu_state structure.  Having the separate structure
> is fine (it does group the fields nicely, plus you can take a pointer
> to it in the functions using this state), but it is good to have the
> state in one place.
> 
> Also, please add the data structures in a separate patch.  This might
> save someone a lot of time and effort should someone breaks the kernel
> in a way that depends on data-structure size.  It would be much easier
> for us if their bisection converged on the commit that adds the data
> structures instead of the commit that also adds a lot of code.
> 
I put the data under rcu_state in the patch-3 in this series. But i can
create a separate patch for this purpose. Should i split it or not?

> > +	/* Finally. */
> > +	complete(&rs->completion);
> > +}
> > +
> > +static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> > +{
> > +	struct llist_node *done, *rcu, *next;
> > +
> > +	done = llist_del_all(&sr.srs_done);
> > +	if (!done)
> > +		return;
> > +
> > +	llist_for_each_safe(rcu, next, done)
> > +		rcu_sr_normal_complete(rcu);
> > +}
> > +static DECLARE_WORK(sr_normal_gp_cleanup, rcu_sr_normal_gp_cleanup_work);
> 
> Why not put this into the sr_normal_state structure?  You can use
> __WORK_INITIALIZER() to initialize it, as is done in a number of other
> places in the kernel.
> 
It is not a big problem. I can move it under "rcu_state" also!

> > +/*
> > + * Helper function for rcu_gp_cleanup().
> > + */
> > +static void rcu_sr_normal_gp_cleanup(void)
> > +{
> > +	struct llist_node *head, *tail;
> > +
> > +	if (llist_empty(&sr.srs_wait))
> > +		return;
> > +
> > +	tail = READ_ONCE(sr.srs_wait_tail);
> > +	head = __llist_del_all(&sr.srs_wait);
> > +
> > +	if (head) {
> > +		/* Can be not empty. */
> > +		llist_add_batch(head, tail, &sr.srs_done);
> > +		queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
> > +	}
> > +}
> > +
> > +/*
> > + * Helper function for rcu_gp_init().
> > + */
> > +static void rcu_sr_normal_gp_init(void)
> > +{
> > +	struct llist_node *head, *tail;
> > +
> > +	if (llist_empty(&sr.srs_next))
> > +		return;
> > +
> > +	tail = llist_del_all(&sr.srs_next);
> > +	head = llist_reverse_order(tail);
> 
> Again, reversing the order is going to cause trouble on large systems.
> Let's please not do that.  (I could have sworn that this was not present
> in the last series...)
> 
> > +	/*
> > +	 * A waiting list of GP should be empty on this step,
> > +	 * since a GP-kthread, rcu_gp_init() -> gp_cleanup(),
> > +	 * rolls it over. If not, it is a BUG, warn a user.
> > +	 */
> > +	WARN_ON_ONCE(!llist_empty(&sr.srs_wait));
> > +
> > +	WRITE_ONCE(sr.srs_wait_tail, tail);
> > +	__llist_add_batch(head, tail, &sr.srs_wait);
> > +}
> > +
> > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > +{
> > +	llist_add((struct llist_node *) &rs->head, &sr.srs_next);
> > +}
> > +
> >  /*
> >   * Initialize a new grace period.  Return false if no grace period required.
> >   */
> > @@ -1456,6 +1556,7 @@ static noinline_for_stack bool rcu_gp_init(void)
> >  	/* Record GP times before starting GP, hence rcu_seq_start(). */
> >  	rcu_seq_start(&rcu_state.gp_seq);
> >  	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
> > +	rcu_sr_normal_gp_init();
> >  	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
> >  	rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
> >  	raw_spin_unlock_irq_rcu_node(rnp);
> > @@ -1825,6 +1926,9 @@ static noinline void rcu_gp_cleanup(void)
> >  	}
> >  	raw_spin_unlock_irq_rcu_node(rnp);
> >  
> > +	// Make synchronize_rcu() users aware of the end of old grace period.
> > +	rcu_sr_normal_gp_cleanup();
> > +
> >  	// If strict, make all CPUs aware of the end of the old grace period.
> >  	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
> >  		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
> > @@ -3561,6 +3665,38 @@ static int rcu_blocking_is_gp(void)
> >  	return true;
> >  }
> >  
> > +/*
> > + * Helper function for the synchronize_rcu() API.
> > + */
> > +static void synchronize_rcu_normal(void)
> > +{
> > +	struct rcu_synchronize rs;
> > +
> > +	if (!READ_ONCE(rcu_normal_wake_from_gp)) {
> > +		wait_rcu_gp(call_rcu_hurry);
> > +		return;
> > +	}
> > +
> > +	init_rcu_head_on_stack(&rs.head);
> > +	init_completion(&rs.completion);
> > +
> > +	/*
> > +	 * This code might be preempted, therefore take a GP
> > +	 * snapshot before adding a request.
> > +	 */
> > +	if (IS_ENABLED(CONFIG_RCU_SR_NORMAL_DEBUG_GP))
> > +		rs.head.func = (void *) get_state_synchronize_rcu();
> > +
> > +	rcu_sr_normal_add_req(&rs);
> > +
> > +	/* Kick a GP and start waiting. */
> > +	(void) start_poll_synchronize_rcu();
> 
> It is unfortunate that the debugging requires an extra timestamp.
> The ways I can think of to avoid this have problems, though.  If this
> thing was replicated per leaf rcu_node structure, the usual approach
> would be to protect it with that structure's ->lock.
> 
Hmm.. a per-node approach can be deployed later. As discussed earlier :)

Debugging part i do not follow, could you please elaborate a bit?

Thanks!

--
Uladzislau Rezki

next prev parent reply	other threads:[~2024-01-17 12:26 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-04 16:25 [PATCH v4 0/4] Reduce synchronize_rcu() latency(v4) Uladzislau Rezki (Sony)
2024-01-04 16:25 ` [PATCH v4 1/4] rcu: Reduce synchronize_rcu() latency Uladzislau Rezki (Sony)
2024-01-09 19:16   ` Kalesh Singh
2024-01-10  9:21     ` Uladzislau Rezki
2024-01-11 16:37       ` Kalesh Singh
2024-01-11 17:35         ` Uladzislau Rezki
2024-01-12 23:09   ` Frederic Weisbecker
2024-01-18 10:37     ` Uladzislau Rezki
2024-01-16 16:18   ` Paul E. McKenney
2024-01-17 12:26     ` Uladzislau Rezki [this message]
2024-01-19 15:24       ` Paul E. McKenney
2024-01-22 17:35         ` Uladzislau Rezki
2024-01-23 11:21           ` Paul E. McKenney
2024-01-04 16:25 ` [PATCH v4 2/4] rcu: Add a trace event for synchronize_rcu_normal() Uladzislau Rezki (Sony)
2024-01-12 23:20   ` Frederic Weisbecker
2024-01-15 12:14     ` Uladzislau Rezki
2024-01-04 16:25 ` [PATCH v4 3/4] rcu: Improve handling of synchronize_rcu() users Uladzislau Rezki (Sony)
2024-01-16 16:32   ` Paul E. McKenney
2024-01-04 16:25 ` [PATCH v4 4/4] rcu: Support direct wake-up " Uladzislau Rezki (Sony)
2024-01-13  9:19   ` Z qiang
2024-01-15 10:46     ` Uladzislau Rezki
2024-01-15 10:57       ` Uladzislau Rezki
2024-01-16  6:19         ` Z qiang
2024-01-27  7:07 ` [PATCH v4 0/4] Reduce synchronize_rcu() latency(v4) Paul E. McKenney
2024-01-29 16:23   ` Uladzislau Rezki
2024-01-29 19:43     ` Paul E. McKenney
2024-01-29 20:36       ` Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZafHa37MS_eYMEr6@pc636 \
    --to=urezki@gmail.com \
    --cc=Neeraj.Upadhyay@amd.com \
    --cc=boqun.feng@gmail.com \
    --cc=frederic@kernel.org \
    --cc=hdanton@sina.com \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleksiy.avramchenko@sony.com \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.