public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Changwoo Min <changwoo@igalia.com>
To: tj@kernel.org, void@manifault.com, arighi@nvidia.com,
	changwoo@igalia.com
Cc: kernel-dev@igalia.com, sched-ext@lists.linux.dev,
	linux-kernel@vger.kernel.org
Subject: [PATCH v3 2/3] sched_ext: Dump the exit CPU first
Date: Wed, 29 Apr 2026 17:23:17 +0900	[thread overview]
Message-ID: <20260429082318.420146-3-changwoo@igalia.com> (raw)
In-Reply-To: <20260429082318.420146-1-changwoo@igalia.com>

When sched_ext is disabled by an error, the CPU that triggered the exit
is the most relevant piece of information for diagnosing the problem.
However, if there are many CPUs, the dump can get truncated and that
CPU's information may not appear in the output.

Add an exit_cpu field to scx_exit_info and thread it through scx_vexit()
/ __scx_exit(). For the watchdog stall path, populate it from cpu_of(rq)
in check_rq_for_timeouts(). For all other exit paths, define a scx_exit()
macro that wraps __scx_exit() with raw_smp_processor_id(), so the CPU
that initiated the exit is captured automatically, with no call-site
changes needed.

In scx_dump_state(), report the exit CPU in the dump header ("on cpu N")
and dump that CPU first, skipping it in the per-CPU loop, so the most
relevant CPU is never truncated out of the dump. The SysRq-D path
initializes exit_cpu to -1 so debug dumps not tied to an exit don't
arbitrarily promote CPU 0.

Signed-off-by: Changwoo Min <changwoo@igalia.com>
---
 kernel/sched/ext.c          | 52 +++++++++++++++++++++++++++----------
 kernel/sched/ext_internal.h |  6 +++++
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 025bd8c6f429..46c2e395de03 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -234,24 +234,29 @@ static bool task_dead_and_done(struct task_struct *p);
 static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags);
 static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind);
 static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
-		      s64 exit_code, const char *fmt, va_list args);
+		      s64 exit_code, s32 exit_cpu, const char *fmt,
+		      va_list args);
 
-static __printf(4, 5) bool scx_exit(struct scx_sched *sch,
-				    enum scx_exit_kind kind, s64 exit_code,
-				    const char *fmt, ...)
+static __printf(5, 6) bool __scx_exit(struct scx_sched *sch,
+				      enum scx_exit_kind kind, s64 exit_code,
+				      s32 exit_cpu, const char *fmt, ...)
 {
 	va_list args;
 	bool ret;
 
 	va_start(args, fmt);
-	ret = scx_vexit(sch, kind, exit_code, fmt, args);
+	ret = scx_vexit(sch, kind, exit_code, exit_cpu, fmt, args);
 	va_end(args);
 
 	return ret;
 }
 
+#define scx_exit(sch, kind, exit_code, fmt, args...)				\
+	__scx_exit(sch, kind, exit_code, raw_smp_processor_id(), fmt, ##args)
+
 #define scx_error(sch, fmt, args...)	scx_exit((sch), SCX_EXIT_ERROR, 0, fmt, ##args)
-#define scx_verror(sch, fmt, args)	scx_vexit((sch), SCX_EXIT_ERROR, 0, fmt, args)
+#define scx_verror(sch, fmt, args)						\
+	scx_vexit((sch), SCX_EXIT_ERROR, 0, raw_smp_processor_id(), fmt, args)
 
 #define SCX_HAS_OP(sch, op)	test_bit(SCX_OP_IDX(op), (sch)->has_op)
 
@@ -3389,9 +3394,10 @@ static bool check_rq_for_timeouts(struct rq *rq)
 					last_runnable + READ_ONCE(sch->watchdog_timeout)))) {
 			u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
 
-			scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
-				 "%s[%d] failed to run for %u.%03us",
-				 p->comm, p->pid, dur_ms / 1000, dur_ms % 1000);
+			__scx_exit(sch, SCX_EXIT_ERROR_STALL, 0, cpu_of(rq),
+				   "%s[%d] failed to run for %u.%03us",
+				   p->comm, p->pid, dur_ms / 1000,
+				   dur_ms % 1000);
 			timed_out = true;
 			break;
 		}
@@ -5528,6 +5534,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len)
 	if (!ei)
 		return NULL;
 
+	ei->exit_cpu = -1;
 	ei->bt = kzalloc_objs(ei->bt[0], SCX_EXIT_BT_LEN);
 	ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
 	ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL);
@@ -6384,8 +6391,13 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
 	if (ei->kind == SCX_EXIT_NONE) {
 		dump_line(&s, "Debug dump triggered by %s", ei->reason);
 	} else {
-		dump_line(&s, "%s[%d] triggered exit kind %d:",
-			  current->comm, current->pid, ei->kind);
+		if (ei->exit_cpu >= 0)
+			dump_line(&s, "%s[%d] triggered exit kind %d on cpu %d:",
+				  current->comm, current->pid, ei->kind,
+				  ei->exit_cpu);
+		else
+			dump_line(&s, "%s[%d] triggered exit kind %d:",
+				  current->comm, current->pid, ei->kind);
 		dump_line(&s, "  %s (%s)", ei->reason, ei->msg);
 		dump_newline(&s);
 		dump_line(&s, "Backtrace:");
@@ -6402,8 +6414,15 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
 	dump_line(&s, "CPU states");
 	dump_line(&s, "----------");
 
+	/*
+	 * Dump the exit CPU first so it isn't lost to dump truncation, then
+	 * walk the rest in order, skipping the one already dumped.
+	 */
+	if (ei->exit_cpu >= 0)
+		scx_dump_cpu(sch, &s, &dctx, ei->exit_cpu, dump_all_tasks);
 	for_each_possible_cpu(cpu) {
-		scx_dump_cpu(sch, &s, &dctx, cpu, dump_all_tasks);
+		if (cpu != ei->exit_cpu)
+			scx_dump_cpu(sch, &s, &dctx, cpu, dump_all_tasks);
 	}
 
 	dump_newline(&s);
@@ -6442,7 +6461,7 @@ static void scx_disable_irq_workfn(struct irq_work *irq_work)
 }
 
 static bool scx_vexit(struct scx_sched *sch,
-		      enum scx_exit_kind kind, s64 exit_code,
+		      enum scx_exit_kind kind, s64 exit_code, s32 exit_cpu,
 		      const char *fmt, va_list args)
 {
 	struct scx_exit_info *ei = sch->exit_info;
@@ -6465,6 +6484,7 @@ static bool scx_vexit(struct scx_sched *sch,
 	 */
 	ei->kind = kind;
 	ei->reason = scx_exit_reason(ei->kind);
+	ei->exit_cpu = exit_cpu;
 
 	irq_work_queue(&sch->disable_irq_work);
 	return true;
@@ -7730,7 +7750,11 @@ static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
 
 static void sysrq_handle_sched_ext_dump(u8 key)
 {
-	struct scx_exit_info ei = { .kind = SCX_EXIT_NONE, .reason = "SysRq-D" };
+	struct scx_exit_info ei = {
+		.kind		= SCX_EXIT_NONE,
+		.exit_cpu	= -1,
+		.reason		= "SysRq-D",
+	};
 	struct scx_sched *sch;
 
 	list_for_each_entry_rcu(sch, &scx_sched_all, all)
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index a54903bb74b3..54c6ed43b6c7 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -97,6 +97,12 @@ struct scx_exit_info {
 	/* %SCX_EXIT_* - broad category of the exit reason */
 	enum scx_exit_kind	kind;
 
+	/*
+	 * CPU that initiated the exit, valid once @kind has been set.
+	 * Negative if the exit path didn't identify a CPU.
+	 */
+	s32			exit_cpu;
+
 	/* exit code if gracefully exiting */
 	s64			exit_code;
 
-- 
2.54.0


  parent reply	other threads:[~2026-04-29  8:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-29  8:23 [PATCH v3 0/3] sched_ext: Improve exit-time diagnostics Changwoo Min
2026-04-29  8:23 ` [PATCH v3 1/3] sched_ext: Extract scx_dump_cpu() from scx_dump_state() Changwoo Min
2026-04-29  8:23 ` Changwoo Min [this message]
2026-04-29  8:23 ` [PATCH v3 3/3] sched_ext: Expose exit_cpu to BPF and userspace Changwoo Min
2026-04-29  8:57 ` [PATCH v3 0/3] sched_ext: Improve exit-time diagnostics Tejun Heo
2026-04-29 11:29   ` Cheng-Yang Chou
2026-04-29 12:51     ` Changwoo Min
2026-04-29 15:16     ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260429082318.420146-3-changwoo@igalia.com \
    --to=changwoo@igalia.com \
    --cc=arighi@nvidia.com \
    --cc=kernel-dev@igalia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sched-ext@lists.linux.dev \
    --cc=tj@kernel.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox