* [PATCH v3 0/2] rcu: Deoffload rdp if rcuop/rcuog kthreads spawn failed
@ 2022-03-18 8:07 Zqiang
2022-03-18 8:07 ` [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op " Zqiang
2022-03-18 8:07 ` [PATCH v3 2/2] rcu: Invert the locking dependency order between rcu_state.barrier_mutex and hotplug lock Zqiang
0 siblings, 2 replies; 4+ messages in thread
From: Zqiang @ 2022-03-18 8:07 UTC (permalink / raw)
To: paulmck, frederic; +Cc: rcu, linux-kernel
When CONFIG_RCU_NOCB_CPU is enabled and 'rcu_nocbs' is set, the rcuop
and rcuog kthreads is created. however the rcuop or rcuog kthreads
creation may fail, if failed, deoffload the offloaded rdp.
---
v1->v2:
Invert the locking dependency order between rcu_state.barrier_mutex
and hotplug lock.
Holding nocb_gp_kthread_mutex, ensure that the nocb_gp_kthread exists.
v2->v3:
Adjust the position of the nocb_gp_kthread_mutex, in rdp_gp->nocb_gp_kthread
check upper lock nocb_gp_kthread_mutex.
Do the locking order change in a separate patch.
Zqiang(2):
rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op kthreads spawn failed
rcu: Invert the locking dependency order between rcu_state.barrier_mutex and hotplug lock
kernel/rcu/tree_nocb.h | 83 ++++++++++++++++++++++++++++++++----------
1 file changed, 63 insertions(+), 20 deletions(-)
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op kthreads spawn failed
2022-03-18 8:07 [PATCH v3 0/2] rcu: Deoffload rdp if rcuop/rcuog kthreads spawn failed Zqiang
@ 2022-03-18 8:07 ` Zqiang
2022-03-22 12:30 ` Frederic Weisbecker
2022-03-18 8:07 ` [PATCH v3 2/2] rcu: Invert the locking dependency order between rcu_state.barrier_mutex and hotplug lock Zqiang
1 sibling, 1 reply; 4+ messages in thread
From: Zqiang @ 2022-03-18 8:07 UTC (permalink / raw)
To: paulmck, frederic; +Cc: rcu, linux-kernel
If the rcuog/op kthreads spawn failed, the offload rdp need to
be deoffload, otherwise because target rdp is considered offloaded
but nothing actually handles the callbacks.
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
kernel/rcu/tree_nocb.h | 75 +++++++++++++++++++++++++++++++++---------
1 file changed, 59 insertions(+), 16 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 46694e13398a..e8265e4ec5ee 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -972,10 +972,7 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
}
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
- if (wake_gp)
- wake_up_process(rdp_gp->nocb_gp_kthread);
-
- return 0;
+ return wake_gp;
}
static long rcu_nocb_rdp_deoffload(void *arg)
@@ -983,9 +980,18 @@ static long rcu_nocb_rdp_deoffload(void *arg)
struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
- int ret;
+ int wake_gp;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+ int condition;
- WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
+ /*
+ * The rcu_nocb_rdp_deoffload() will be called directly when
+ * rcuog/op spawn failed, because at this time the rdp->cpu
+ * is not online(cpu_online(rdp->cpu) return false), the deoffload
+ * operation was not performed on rdp->cpu, to avoid warnings
+ * add cpu_online(rdp->cpu) condition judgment.
+ */
+ WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu));
pr_info("De-offloading %d\n", rdp->cpu);
@@ -1009,10 +1015,33 @@ static long rcu_nocb_rdp_deoffload(void *arg)
*/
rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
invoke_rcu_core();
- ret = rdp_offload_toggle(rdp, false, flags);
- swait_event_exclusive(rdp->nocb_state_wq,
- !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
- SEGCBLIST_KTHREAD_GP));
+ wake_gp = rdp_offload_toggle(rdp, false, flags);
+
+ mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
+ if (rdp_gp->nocb_gp_kthread) {
+ if (wake_gp)
+ wake_up_process(rdp_gp->nocb_gp_kthread);
+
+ if (rdp->nocb_cb_kthread) {
+ condition = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP;
+ } else {
+ /*
+ *If rcuop kthread spawn failed, direct remove SEGCBLIST_KTHREAD_CB
+ *just wait SEGCBLIST_KTHREAD_GP to be cleared.
+ */
+ condition = SEGCBLIST_KTHREAD_GP;
+ rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
+ }
+ swait_event_exclusive(rdp->nocb_state_wq,
+ !rcu_segcblist_test_flags(cblist, condition));
+ } else {
+ rcu_nocb_lock_irqsave(rdp, flags);
+ rcu_segcblist_clear_flags(&rdp->cblist,
+ SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ }
+ mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
+
/* Stop nocb_gp_wait() from iterating over this structure. */
list_del_rcu(&rdp->nocb_entry_rdp);
/*
@@ -1035,7 +1064,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
- return ret;
+ return 0;
}
int rcu_nocb_cpu_deoffload(int cpu)
@@ -1067,7 +1096,8 @@ static long rcu_nocb_rdp_offload(void *arg)
struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
- int ret;
+ int wake_gp;
+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
/*
@@ -1077,6 +1107,9 @@ static long rcu_nocb_rdp_offload(void *arg)
if (!rdp->nocb_gp_rdp)
return -EINVAL;
+ if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
+ return -EINVAL;
+
pr_info("Offloading %d\n", rdp->cpu);
/*
@@ -1111,7 +1144,9 @@ static long rcu_nocb_rdp_offload(void *arg)
* WRITE flags READ callbacks
* rcu_nocb_unlock() rcu_nocb_unlock()
*/
- ret = rdp_offload_toggle(rdp, true, flags);
+ wake_gp = rdp_offload_toggle(rdp, true, flags);
+ if (wake_gp)
+ wake_up_process(rdp_gp->nocb_gp_kthread);
swait_event_exclusive(rdp->nocb_state_wq,
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
@@ -1124,7 +1159,7 @@ static long rcu_nocb_rdp_offload(void *arg)
rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
rcu_nocb_unlock_irqrestore(rdp, flags);
- return ret;
+ return 0;
}
int rcu_nocb_cpu_offload(int cpu)
@@ -1246,7 +1281,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
"rcuog/%d", rdp_gp->cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
- return;
+ goto end;
}
WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
if (kthread_prio)
@@ -1258,12 +1293,20 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
t = kthread_run(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
- return;
+ goto end;
if (kthread_prio)
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
WRITE_ONCE(rdp->nocb_cb_kthread, t);
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
+ return;
+end:
+ mutex_lock(&rcu_state.barrier_mutex);
+ if (rcu_rdp_is_offloaded(rdp)) {
+ rcu_nocb_rdp_deoffload(rdp);
+ cpumask_clear_cpu(cpu, rcu_nocb_mask);
+ }
+ mutex_unlock(&rcu_state.barrier_mutex);
}
/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3 2/2] rcu: Invert the locking dependency order between rcu_state.barrier_mutex and hotplug lock
2022-03-18 8:07 [PATCH v3 0/2] rcu: Deoffload rdp if rcuop/rcuog kthreads spawn failed Zqiang
2022-03-18 8:07 ` [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op " Zqiang
@ 2022-03-18 8:07 ` Zqiang
1 sibling, 0 replies; 4+ messages in thread
From: Zqiang @ 2022-03-18 8:07 UTC (permalink / raw)
To: paulmck, frederic; +Cc: rcu, linux-kernel
When call rcutree_prepare_cpu(), the cpus write lock has been held,
just lock the barrier_mutex before calling rcu_nocb_rdp_deoffload()
from failure path. therefore, invert the locking dependency order.
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
kernel/rcu/tree_nocb.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index e8265e4ec5ee..fe0063a23fbb 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1072,8 +1072,8 @@ int rcu_nocb_cpu_deoffload(int cpu)
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int ret = 0;
- mutex_lock(&rcu_state.barrier_mutex);
cpus_read_lock();
+ mutex_lock(&rcu_state.barrier_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
if (cpu_online(cpu)) {
ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
@@ -1084,8 +1084,8 @@ int rcu_nocb_cpu_deoffload(int cpu)
ret = -EINVAL;
}
}
- cpus_read_unlock();
mutex_unlock(&rcu_state.barrier_mutex);
+ cpus_read_unlock();
return ret;
}
@@ -1167,8 +1167,8 @@ int rcu_nocb_cpu_offload(int cpu)
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int ret = 0;
- mutex_lock(&rcu_state.barrier_mutex);
cpus_read_lock();
+ mutex_lock(&rcu_state.barrier_mutex);
if (!rcu_rdp_is_offloaded(rdp)) {
if (cpu_online(cpu)) {
ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
@@ -1179,8 +1179,8 @@ int rcu_nocb_cpu_offload(int cpu)
ret = -EINVAL;
}
}
- cpus_read_unlock();
mutex_unlock(&rcu_state.barrier_mutex);
+ cpus_read_unlock();
return ret;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op kthreads spawn failed
2022-03-18 8:07 ` [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op " Zqiang
@ 2022-03-22 12:30 ` Frederic Weisbecker
0 siblings, 0 replies; 4+ messages in thread
From: Frederic Weisbecker @ 2022-03-22 12:30 UTC (permalink / raw)
To: Zqiang; +Cc: paulmck, rcu, linux-kernel
On Fri, Mar 18, 2022 at 04:07:18PM +0800, Zqiang wrote:
> @@ -1009,10 +1015,33 @@ static long rcu_nocb_rdp_deoffload(void *arg)
> */
> rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
> invoke_rcu_core();
> - ret = rdp_offload_toggle(rdp, false, flags);
> - swait_event_exclusive(rdp->nocb_state_wq,
> - !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
> - SEGCBLIST_KTHREAD_GP));
> + wake_gp = rdp_offload_toggle(rdp, false, flags);
> +
> + mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
> + if (rdp_gp->nocb_gp_kthread) {
> + if (wake_gp)
> + wake_up_process(rdp_gp->nocb_gp_kthread);
> +
> + if (rdp->nocb_cb_kthread) {
> + condition = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP;
> + } else {
> + /*
> + *If rcuop kthread spawn failed, direct remove SEGCBLIST_KTHREAD_CB
> + *just wait SEGCBLIST_KTHREAD_GP to be cleared.
> + */
> + condition = SEGCBLIST_KTHREAD_GP;
> + rcu_segcblist_clear_flags(&rdp->cblist,
> SEGCBLIST_KTHREAD_CB);
You may be running concurrently againt nocb_gp_wait() ->
nocb_gp_update_state_deoffloading() -> rcu_segcblist_clear_flags(cblist,
SEGCBLIST_KTHREAD_GP)
So you need to protect the flags clear with rcu_nocb lock.
> + }
> + swait_event_exclusive(rdp->nocb_state_wq,
> + !rcu_segcblist_test_flags(cblist, condition));
> + } else {
> + rcu_nocb_lock_irqsave(rdp, flags);
> + rcu_segcblist_clear_flags(&rdp->cblist,
> + SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
> + rcu_nocb_unlock_irqrestore(rdp, flags);
Like you're doing here.
Thanks!
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-03-22 12:30 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-18 8:07 [PATCH v3 0/2] rcu: Deoffload rdp if rcuop/rcuog kthreads spawn failed Zqiang
2022-03-18 8:07 ` [PATCH v3 1/2] rcu: Call rcu_nocb_rdp_deoffload() directly after rcuog/op " Zqiang
2022-03-22 12:30 ` Frederic Weisbecker
2022-03-18 8:07 ` [PATCH v3 2/2] rcu: Invert the locking dependency order between rcu_state.barrier_mutex and hotplug lock Zqiang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.