public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@kernel.org>
To: rcu@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, kernel-team@fb.com,
	rostedt@goodmis.org, Zqiang <qiang1.zhang@intel.com>,
	Ammar Faizi <ammarfaizi2@gnuweeb.org>,
	"Paul E . McKenney" <paulmck@kernel.org>
Subject: [PATCH rcu 10/10] rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
Date: Fri,  4 Feb 2022 15:24:06 -0800	[thread overview]
Message-ID: <20220204232406.814-10-paulmck@kernel.org> (raw)
In-Reply-To: <20220204232355.GA728@paulmck-ThinkPad-P17-Gen-1>

From: Zqiang <qiang1.zhang@intel.com>

When the rcutree.use_softirq kernel boot parameter is set to zero, all
RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
If these kthreads are being starved, quiescent states will not be
reported, which in turn means that the grace period will not end, which
can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
stack traces of stalled CPUs' rcuc kthreads, which can help identify
what is preventing those kthreads from running.

Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        |  3 +++
 kernel/rcu/tree.h        |  1 +
 kernel/rcu/tree_plugin.h |  3 +++
 kernel/rcu/tree_stall.h  | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9e4c5b281f003..bd9b2af247abd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
 	int spincnt;
 
 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
 	for (spincnt = 0; spincnt < 10; spincnt++) {
+		WRITE_ONCE(*j, jiffies);
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
@@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
 	schedule_timeout_idle(2);
 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
 	*statusp = RCU_KTHREAD_WAITING;
+	WRITE_ONCE(*j, jiffies);
 }
 
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e9990945483f1..b84cc5742c317 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -239,6 +239,7 @@ struct rcu_data {
 					/* rcuc per-CPU kthread or NULL. */
 	unsigned int rcu_cpu_kthread_status;
 	char rcu_cpu_has_work;
+	unsigned long rcuc_activity;
 
 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 02ac057ba3f83..8167cab1bffc8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
  */
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 #ifdef CONFIG_RCU_BOOST
 	struct sched_param sp;
 
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+
+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
 }
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030b..0c5d8516516af 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
 	return j > 2 * HZ;
 }
 
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
 	       falsepositive ? " (false positive?)" : "");
 }
 
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+	int cpu;
+	unsigned long j;
+	struct task_struct *rcuc;
+
+	rcuc = rdp->rcu_cpu_kthread_task;
+	if (!rcuc)
+		return;
+
+	cpu = task_cpu(rcuc);
+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
+		return;
+
+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+		return;
+
+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+	sched_show_task(rcuc);
+	if (!trigger_single_cpu_backtrace(cpu))
+		dump_cpu_task(cpu);
+}
+
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
+	if (!use_softirq)
+		rcuc_kthread_dump(rdp);
+
 	rcu_dump_cpu_stacks();
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-- 
2.31.1.189.g2e36527f23


      parent reply	other threads:[~2022-02-04 23:24 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-04 23:23 [PATCH rcu 0/10] Real-time-related updates for v5.18 Paul E. McKenney
2022-02-04 23:23 ` [PATCH rcu 01/10] rcu: Mark accesses to boost_starttime Paul E. McKenney
2022-02-04 23:23 ` [PATCH rcu 02/10] rcu/nocb: Handle concurrent nocb kthreads creation Paul E. McKenney
2022-02-04 23:23 ` [PATCH rcu 03/10] rcu: Remove unused rcu_state.boost Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 04/10] rcu: Create per-cpu rcuc kthreads only when rcutree.use_softirq=0 Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 05/10] rcu: Move kthread_prio bounds-check to a separate function Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 06/10] rcu: Make priority of grace-period thread consistent Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 07/10] rcu: Elevate priority of offloaded callback threads Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 08/10] rcu: Update documentation regarding kthread_prio cmdline parameter Paul E. McKenney
2022-02-04 23:24 ` [PATCH rcu 09/10] rcu: Don't deboost before reporting expedited quiescent state Paul E. McKenney
2022-02-04 23:24 ` Paul E. McKenney [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220204232406.814-10-paulmck@kernel.org \
    --to=paulmck@kernel.org \
    --cc=ammarfaizi2@gnuweeb.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=qiang1.zhang@intel.com \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox