From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
Boqun Feng <boqun.feng@gmail.com>,
Joel Fernandes <joel@joelfernandes.org>,
Neeraj Upadhyay <neeraj.upadhyay@amd.com>,
"Paul E . McKenney" <paulmck@kernel.org>,
Uladzislau Rezki <urezki@gmail.com>,
Zqiang <qiang.zhang1211@gmail.com>, rcu <rcu@vger.kernel.org>,
Anna-Maria Behnsen <anna-maria.behnsen@linutronix.de>,
Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 6/8] rcu/exp: Make parallel exp gp kworker per rcu node
Date: Fri, 8 Dec 2023 23:05:43 +0100 [thread overview]
Message-ID: <20231208220545.7452-7-frederic@kernel.org> (raw)
In-Reply-To: <20231208220545.7452-1-frederic@kernel.org>
When CONFIG_RCU_EXP_KTHREAD=n, the expedited grace period per node
initialization is performed in parallel via workqueues (one work per
node).
However in CONFIG_RCU_EXP_KTHREAD=y, this per node initialization is
performed by a single kworker serializing each node initialization (one
work for all nodes).
The second part is certainly less scalable and efficient beyond a single
leaf node.
To improve this, expand this single kworker into per-node kworkers. This
new layout is eventually intended to remove the workqueues based
implementation since it will essentially now become duplicate code.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
kernel/rcu/rcu.h | 1 -
kernel/rcu/tree.c | 123 +++++++++++++++++++++++----------------
kernel/rcu/tree.h | 1 +
kernel/rcu/tree_exp.h | 10 ++--
kernel/rcu/tree_plugin.h | 10 +---
5 files changed, 81 insertions(+), 64 deletions(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f94f65877f2b..6beaf70d629f 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -625,7 +625,6 @@ void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
#ifdef CONFIG_RCU_EXP_KTHREAD
extern struct kthread_worker *rcu_exp_gp_kworker;
-extern struct kthread_worker *rcu_exp_par_gp_kworker;
#else /* !CONFIG_RCU_EXP_KTHREAD */
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* CONFIG_RCU_EXP_KTHREAD */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 996efaded5bf..060d418c2b44 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4367,6 +4367,75 @@ rcu_boot_init_percpu_data(int cpu)
rcu_boot_init_nocb_percpu_data(rdp);
}
+#ifdef CONFIG_RCU_EXP_KTHREAD
+struct kthread_worker *rcu_exp_gp_kworker;
+
+static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
+{
+ struct kthread_worker *kworker;
+ const char *name = "rcu_exp_par_gp_kthread_worker/%d";
+ struct sched_param param = { .sched_priority = kthread_prio };
+ int rnp_index = rnp - rcu_get_root();
+
+ if (rnp->exp_kworker)
+ return;
+
+ kworker = kthread_create_worker(0, name, rnp_index);
+ if (IS_ERR_OR_NULL(kworker)) {
+ pr_err("Failed to create par gp kworker on %d/%d\n",
+ rnp->grplo, rnp->grphi);
+ return;
+ }
+ WRITE_ONCE(rnp->exp_kworker, kworker);
+ sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m);
+}
+
+static void __init rcu_start_exp_gp_kworker(void)
+{
+ const char *name = "rcu_exp_gp_kthread_worker";
+ struct sched_param param = { .sched_priority = kthread_prio };
+
+ rcu_exp_gp_kworker = kthread_create_worker(0, name);
+ if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+ pr_err("Failed to create %s!\n", name);
+ rcu_exp_gp_kworker = NULL;
+ return;
+ }
+ sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m);
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+struct workqueue_struct *rcu_par_gp_wq;
+
+static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
+{
+}
+
+static void __init rcu_start_exp_gp_kworker(void)
+{
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+ rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+ WARN_ON(!rcu_par_gp_wq);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
+static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp)
+{
+ if ((IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) ||
+ IS_ENABLED(CONFIG_RCU_BOOST)) && rcu_scheduler_fully_active) {
+ mutex_lock(&rnp->kthread_mutex);
+ rcu_spawn_one_boost_kthread(rnp);
+ rcu_spawn_exp_par_gp_kworker(rnp);
+ mutex_unlock(&rnp->kthread_mutex);
+ }
+}
+
/*
* Invoked early in the CPU-online process, when pretty much all services
* are available. The incoming CPU is not present.
@@ -4415,7 +4484,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- rcu_spawn_one_boost_kthread(rnp);
+ rcu_spawn_rnp_kthreads(rnp);
rcu_spawn_cpu_nocb_kthread(cpu);
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
@@ -4704,54 +4773,6 @@ static int rcu_pm_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-#ifdef CONFIG_RCU_EXP_KTHREAD
-struct kthread_worker *rcu_exp_gp_kworker;
-struct kthread_worker *rcu_exp_par_gp_kworker;
-
-static void __init rcu_start_exp_gp_kworkers(void)
-{
- const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
- const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
- struct sched_param param = { .sched_priority = kthread_prio };
-
- rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
- if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
- pr_err("Failed to create %s!\n", gp_kworker_name);
- rcu_exp_gp_kworker = NULL;
- return;
- }
-
- rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
- if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
- pr_err("Failed to create %s!\n", par_gp_kworker_name);
- rcu_exp_par_gp_kworker = NULL;
- kthread_destroy_worker(rcu_exp_gp_kworker);
- rcu_exp_gp_kworker = NULL;
- return;
- }
-
- sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m);
- sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
- ¶m);
-}
-
-static inline void rcu_alloc_par_gp_wq(void)
-{
-}
-#else /* !CONFIG_RCU_EXP_KTHREAD */
-struct workqueue_struct *rcu_par_gp_wq;
-
-static void __init rcu_start_exp_gp_kworkers(void)
-{
-}
-
-static inline void rcu_alloc_par_gp_wq(void)
-{
- rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
- WARN_ON(!rcu_par_gp_wq);
-}
-#endif /* CONFIG_RCU_EXP_KTHREAD */
-
/*
* Spawn the kthreads that handle RCU's grace periods.
*/
@@ -4786,10 +4807,10 @@ static int __init rcu_spawn_gp_kthread(void)
* due to rcu_scheduler_fully_active.
*/
rcu_spawn_cpu_nocb_kthread(smp_processor_id());
- rcu_spawn_one_boost_kthread(rdp->mynode);
+ rcu_spawn_rnp_kthreads(rdp->mynode);
rcu_spawn_core_kthreads();
/* Create kthread worker for expedited GPs */
- rcu_start_exp_gp_kworkers();
+ rcu_start_exp_gp_kworker();
return 0;
}
early_initcall(rcu_spawn_gp_kthread);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f98a245e5f32..ef3d3385063f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -72,6 +72,7 @@ struct rcu_node {
/* Online CPUs for next expedited GP. */
/* Any CPU that has ever been online will */
/* have its bit set. */
+ struct kthread_worker *exp_kworker;
unsigned long cbovldmask;
/* CPUs experiencing callback overload. */
unsigned long ffmask; /* Fully functional CPUs. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index cb31f4fb4b36..744d6acf5553 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -434,9 +434,9 @@ static inline bool rcu_exp_worker_started(void)
return !!READ_ONCE(rcu_exp_gp_kworker);
}
-static inline bool rcu_exp_par_worker_started(void)
+static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{
- return !!READ_ONCE(rcu_exp_par_gp_kworker);
+ return !!READ_ONCE(rnp->exp_kworker);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
@@ -447,7 +447,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
* another work item on the same kthread worker can result in
* deadlock.
*/
- kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+ kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
@@ -489,7 +489,7 @@ static inline bool rcu_exp_worker_started(void)
return !!READ_ONCE(rcu_gp_wq);
}
-static inline bool rcu_exp_par_worker_started(void)
+static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
{
return !!READ_ONCE(rcu_par_gp_wq);
}
@@ -552,7 +552,7 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
- if (!rcu_exp_par_worker_started() ||
+ if (!rcu_exp_par_worker_started(rnp) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) {
/* No worker started yet or last leaf, do direct call. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d307674915c..09bdd36ca9ff 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
struct sched_param sp;
struct task_struct *t;
- mutex_lock(&rnp->kthread_mutex);
- if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
- goto out;
+ if (rnp->boost_kthread_task)
+ return;
t = kthread_create(rcu_boost_kthread, (void *)rnp,
"rcub/%d", rnp_index);
if (WARN_ON_ONCE(IS_ERR(t)))
- goto out;
+ return;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
@@ -1210,9 +1209,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
-
- out:
- mutex_unlock(&rnp->kthread_mutex);
}
/*
--
2.42.1
next prev parent reply other threads:[~2023-12-08 22:06 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-08 22:05 [PATCH 0/8] rcu: Fix expedited GP deadlock (and cleanup some nocb stuff) Frederic Weisbecker
2023-12-08 22:05 ` [PATCH 1/8] rcu/nocb: Make IRQs disablement symetric Frederic Weisbecker
2023-12-13 13:01 ` Neeraj upadhyay
2023-12-08 22:05 ` [PATCH 2/8] rcu/nocb: Re-arrange call_rcu() NOCB specific code Frederic Weisbecker
2023-12-08 22:05 ` [PATCH 3/8] rcu/exp: Fix RCU expedited parallel grace period kworker allocation failure recovery Frederic Weisbecker
2023-12-12 16:04 ` Kalesh Singh
2023-12-12 16:10 ` Kalesh Singh
2023-12-08 22:05 ` [PATCH 4/8] rcu/exp: Handle RCU expedited grace period kworker allocation failure Frederic Weisbecker
2023-12-12 16:14 ` Kalesh Singh
2023-12-08 22:05 ` [PATCH 5/8] rcu: s/boost_kthread_mutex/kthread_mutex Frederic Weisbecker
2023-12-08 22:05 ` Frederic Weisbecker [this message]
2023-12-08 22:05 ` [PATCH 7/8] rcu/exp: Handle parallel exp gp kworkers affinity Frederic Weisbecker
2023-12-08 22:05 ` [PATCH 8/8] rcu/exp: Remove rcu_par_gp_wq Frederic Weisbecker
2023-12-11 16:38 ` [PATCH 0/8] rcu: Fix expedited GP deadlock (and cleanup some nocb stuff) Paul E. McKenney
2023-12-11 20:04 ` Frederic Weisbecker
2023-12-11 21:39 ` Paul E. McKenney
2023-12-12 13:34 ` Frederic Weisbecker
-- strict thread matches above, loose matches on Subject: below --
2023-12-19 14:08 [PATCH 0/8 v2] " Frederic Weisbecker
2023-12-19 14:08 ` [PATCH 6/8] rcu/exp: Make parallel exp gp kworker per rcu node Frederic Weisbecker
2024-01-29 23:23 [PATCH 0/8] RCU exp updates for v6.9 Boqun Feng
2024-01-29 23:23 ` [PATCH 6/8] rcu/exp: Make parallel exp gp kworker per rcu node Boqun Feng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231208220545.7452-7-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=anna-maria.behnsen@linutronix.de \
--cc=boqun.feng@gmail.com \
--cc=joel@joelfernandes.org \
--cc=linux-kernel@vger.kernel.org \
--cc=neeraj.upadhyay@amd.com \
--cc=paulmck@kernel.org \
--cc=qiang.zhang1211@gmail.com \
--cc=rcu@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=urezki@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox