Linux RCU subsystem development
 help / color / mirror / Atom feed
From: neeraj.upadhyay@kernel.org
To: rcu@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, kernel-team@meta.com,
	rostedt@goodmis.org, paulmck@kernel.org,
	neeraj.upadhyay@kernel.org, neeraj.upadhyay@amd.com,
	boqun.feng@gmail.com, joel@joelfernandes.org, urezki@gmail.com,
	frederic@kernel.org
Subject: [PATCH rcu 04/14] rcuscale: Dump stacks of stalled rcu_scale_writer() instances
Date: Fri, 16 Aug 2024 12:32:46 +0530	[thread overview]
Message-ID: <20240816070256.60993-4-neeraj.upadhyay@kernel.org> (raw)
In-Reply-To: <20240816070209.GA60666@neeraj.linux>

From: "Paul E. McKenney" <paulmck@kernel.org>

This commit improves debuggability by dumping the stacks of
rcu_scale_writer() instances that have not completed in a reasonable
timeframe.  These stacks are dumped remotely, but they will be accurate
in the thus-far common case where the stalled rcu_scale_writer() instances
are blocked.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: "Paul E. McKenney" <paulmck@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 kernel/rcu/rcuscale.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 3269dd9c639f..5087ca7062d9 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -39,6 +39,7 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/sched/debug.h>
 
 #include "rcu.h"
 
@@ -111,6 +112,7 @@ static struct task_struct **reader_tasks;
 static struct task_struct *shutdown_task;
 
 static u64 **writer_durations;
+static bool *writer_done;
 static int *writer_n_durations;
 static atomic_t n_rcu_scale_reader_started;
 static atomic_t n_rcu_scale_writer_started;
@@ -524,6 +526,7 @@ rcu_scale_writer(void *arg)
 			started = true;
 		if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
 			done = true;
+			WRITE_ONCE(writer_done[me], true);
 			sched_set_normal(current, 0);
 			pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
 				 scale_type, SCALE_FLAG, me, MIN_MEAS);
@@ -549,6 +552,19 @@ rcu_scale_writer(void *arg)
 		if (done && !alldone &&
 		    atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
 			alldone = true;
+		if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) {
+			static atomic_t dumped;
+			int i;
+
+			if (!atomic_xchg(&dumped, 1)) {
+				for (i = 0; i < nrealwriters; i++) {
+					if (writer_done[i])
+						continue;
+					pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i);
+					sched_show_task(writer_tasks[i]);
+				}
+			}
+		}
 		if (started && !alldone && i < MAX_MEAS - 1)
 			i++;
 		rcu_scale_wait_shutdown();
@@ -921,6 +937,8 @@ rcu_scale_cleanup(void)
 		kfree(writer_tasks);
 		kfree(writer_durations);
 		kfree(writer_n_durations);
+		kfree(writer_done);
+		writer_done = NULL;
 	}
 
 	/* Do torture-type-specific cleanup operations.  */
@@ -1015,10 +1033,11 @@ rcu_scale_init(void)
 	}
 	while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders)
 		schedule_timeout_uninterruptible(1);
-	writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]), GFP_KERNEL);
+	writer_tasks = kcalloc(nrealwriters, sizeof(writer_tasks[0]), GFP_KERNEL);
 	writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL);
 	writer_n_durations = kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL);
-	if (!writer_tasks || !writer_durations || !writer_n_durations) {
+	writer_done = kcalloc(nrealwriters, sizeof(writer_done[0]), GFP_KERNEL);
+	if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done) {
 		SCALEOUT_ERRSTRING("out of memory");
 		firsterr = -ENOMEM;
 		goto unwind;
-- 
2.40.1


  parent reply	other threads:[~2024-08-16  7:04 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-16  7:02 [PATCH rcu 00/14] RCU scaling tests updates for v6.12 Neeraj Upadhyay
2024-08-16  7:02 ` [PATCH rcu 01/14] refscale: Add TINY scenario neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 02/14] refscale: Optimize process_durations() neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 03/14] rcuscale: Save a few lines with whitespace-only change neeraj.upadhyay
2024-08-16  7:02 ` neeraj.upadhyay [this message]
2024-08-16  7:02 ` [PATCH rcu 05/14] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 06/14] rcu: Mark callbacks not currently participating in barrier operation neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 07/14] rcuscale: Print detailed grace-period and barrier diagnostics neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 08/14] rcuscale: Provide clear error when async specified without primitives neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 09/14] rcuscale: Make all writer tasks report upon hang neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 10/14] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 11/14] rcuscale: Use special allocator for rcu_scale_writer() neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 12/14] rcuscale: NULL out top-level pointers to heap memory neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 13/14] rcuscale: Count outstanding callbacks per-task rather than per-CPU neeraj.upadhyay
2024-08-16  7:02 ` [PATCH rcu 14/14] refscale: Constify struct ref_scale_ops neeraj.upadhyay

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240816070256.60993-4-neeraj.upadhyay@kernel.org \
    --to=neeraj.upadhyay@kernel.org \
    --cc=boqun.feng@gmail.com \
    --cc=frederic@kernel.org \
    --cc=joel@joelfernandes.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neeraj.upadhyay@amd.com \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=urezki@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox