All of lore.kernel.org
 help / color / mirror / Atom feed
From: Petr Mladek <pmladek@suse.com>
To: Feng Tang <feng.tang@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Lance Yang <ioworker0@gmail.com>,
	paulmck@kernel.org, Steven Rostedt <rostedt@goodmis.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/3] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Date: Mon, 10 Nov 2025 18:55:57 +0100	[thread overview]
Message-ID: <aRInLdgKCzaVeyG0@pathway.suse.cz> (raw)
In-Reply-To: <20251106023032.25875-3-feng.tang@linux.alibaba.com>

On Thu 2025-11-06 10:30:31, Feng Tang wrote:
> When task-hung happens, developers may need different kinds of system
> information (call-stacks, memory info, locks, etc.) to help debugging.
> 
> Add 'hung_task_sys_info' sysctl knob to take human readable string like
> "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
> requested information will be dumped. (refer kernel/sys_info.c for more
> details).
> 
> Meanwhile, the newly introduced sys_info() call is used to unify some
> existing info-dumping knobs.
> 
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -60,12 +61,23 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
>  static int __read_mostly sysctl_hung_task_warnings = 10;
>  
>  static int __read_mostly did_panic;
> -static bool hung_task_show_lock;
>  static bool hung_task_call_panic;
> -static bool hung_task_show_all_bt;
>  
>  static struct task_struct *watchdog_task;
>  
> +/*
> + * A bitmask to control what kinds of system info to be printed when
> + * a hung task is detected, it could be task, memory, lock etc. Refer
> + * include/linux/sys_info.h for detailed bit definition.
> + */
> +static unsigned long hung_task_si_mask;
> +
> +/*
> + * There are several sysctl knobs, and this serves as the runtime
> + * effective sys_info knob
> + */
> +static unsigned long cur_si_mask;

It seems that this variable is used to pass information between
check_hung_task() and check_hung_uninterruptible_tasks().

And "hung_task_show_lock" and "hung_task_show_all_bt" had the same
purpose.

If I get it correctly, we could move these decisions to
check_hung_uninterruptible_tasks() and avoid the global
variable.

I think that it even makes the code a bit cleaner.

Something like this on top of this patch:

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 5f0275b2c742..c2a0dfce1e56 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -71,12 +71,6 @@ static struct task_struct *watchdog_task;
  */
 static unsigned long hung_task_si_mask;
 
-/*
- * There are several sysctl knobs, and this serves as the runtime
- * effective sys_info knob
- */
-static unsigned long cur_si_mask;
-
 #ifdef CONFIG_SMP
 /*
  * Should we dump all CPUs backtraces in a hung task event?
@@ -229,11 +223,8 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
 }
 #endif
 
-static void check_hung_task(struct task_struct *t, unsigned long timeout,
-		unsigned long prev_detect_count)
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
-	unsigned long total_hung_task;
-
 	if (!task_is_hung(t, timeout))
 		return;
 
@@ -243,16 +234,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 	 */
 	sysctl_hung_task_detect_count++;
 
-	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
 	trace_sched_process_hang(t);
 
-	cur_si_mask = hung_task_si_mask;
-	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
-		console_verbose();
-		cur_si_mask |= SYS_INFO_LOCKS;
-		hung_task_call_panic = true;
-	}
-
 	/*
 	 * Ok, the task did not get scheduled for more than 2 minutes,
 	 * complain:
@@ -272,10 +255,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 			" disables this message.\n");
 		sched_show_task(t);
 		debug_show_blocker(t, timeout);
-		cur_si_mask |= SYS_INFO_LOCKS;
 
-		if (sysctl_hung_task_all_cpu_backtrace)
-			cur_si_mask |= SYS_INFO_ALL_BT;
 		if (!sysctl_hung_task_warnings)
 			pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
 	}
@@ -315,8 +295,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 {
 	int max_count = sysctl_hung_task_check_count;
 	unsigned long last_break = jiffies;
+	unsigned long total_hung_task;
 	struct task_struct *g, *t;
 	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
+	unsigned long si_mask;
 
 	/*
 	 * If the system crashed already then all bets are off,
@@ -325,6 +307,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	if (test_taint(TAINT_DIE) || did_panic)
 		return;
 
+	si_mask = hung_task_si_mask;
+	if (sysctl_hung_task_warnings || hung_task_call_panic) {
+		si_mask |= SYS_INFO_LOCKS;
+
+		if (sysctl_hung_task_all_cpu_backtrace)
+			si_mask |= SYS_INFO_ALL_BT;
+	}
+
 	rcu_read_lock();
 	for_each_process_thread(g, t) {
 
@@ -336,16 +326,20 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 			last_break = jiffies;
 		}
 
-		check_hung_task(t, timeout, prev_detect_count);
+		check_hung_task(t, timeout);
 	}
  unlock:
 	rcu_read_unlock();
 
-	if (unlikely(cur_si_mask)) {
-		sys_info(cur_si_mask);
-		cur_si_mask = 0;
+	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
+	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
+		console_verbose();
+		hung_task_call_panic = true;
 	}
 
+	if (unlikely(si_mask))
+		sys_info(si_mask);
+
 	if (hung_task_call_panic)
 		panic("hung_task: blocked tasks");
 }

What do you think?

Hmm, maybe, we might still need to pass "prev_detect_count" and
keep "console_verbose()" in check_hung_task().

Best Regards,
Petr

  parent reply	other threads:[~2025-11-10 17:56 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-06  2:30 [PATCH 0/3] Enable hung_task and lockup cases to dump system info on demand Feng Tang
2025-11-06  2:30 ` [PATCH 1/3] docs: panic: correct some sys_ifo names in sysctl doc Feng Tang
2025-11-10 16:52   ` Petr Mladek
2025-11-11 14:09     ` Feng Tang
2025-11-06  2:30 ` [PATCH 2/3] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung Feng Tang
2025-11-06  3:28   ` Lance Yang
2025-11-06  4:48     ` Feng Tang
2025-11-10 17:55   ` Petr Mladek [this message]
2025-11-11 13:37     ` Feng Tang
2025-11-12 11:25     ` Feng Tang
2025-11-12 14:44       ` Petr Mladek
2025-11-13  2:56         ` Feng Tang
2025-11-06  2:30 ` [PATCH 3/3] watchdog: add lockup_sys_info sysctl to dump sys info on system lockup Feng Tang
2025-11-11 13:26   ` Petr Mladek
2025-11-11 14:09     ` Feng Tang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aRInLdgKCzaVeyG0@pathway.suse.cz \
    --to=pmladek@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=feng.tang@linux.alibaba.com \
    --cc=ioworker0@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.