* [PATCH v4] hung_task: Deduplicate identical hang reports using explicit blocker tracking
@ 2026-06-27 20:57 Aaron Tomlin
0 siblings, 0 replies; only message in thread
From: Aaron Tomlin @ 2026-06-27 20:57 UTC (permalink / raw)
To: akpm, lance.yang, mhiramat, pmladek
Cc: linux-kernel, david.laight.linux, atomlin, neelx, sean, chjohnst,
steve, mproche, nick.lange
Currently, during severe lock contention, multiple tasks can hang while
waiting on the exact same resource. The khungtaskd kthread
indiscriminately reports every single instance with a stack trace.
This can roll the kernel ring buffer and prematurely exhaust the
kernel.hung_task_warnings budget. Consequently, the kernel is left
entirely blind to subsequent, unrelated deadlocks.
To preserve the warning budget and ring buffer without sacrificing
observability, this patch introduces a two-tier deduplication mechanism:
1. Introduces a hung_task_reported in task_struct. If a task remains
hung across multiple check intervals, khungtaskd suppresses
redundant stack traces for that specific task until it makes
progress (verified via context switch counters). Furthermore, it
is packed into an existing compiler alignment hole, consuming
zero additional memory
2. For tasks detected in the same scan, we leverage the existing
CONFIG_DETECT_HUNG_TASK_BLOCKER infrastructure. By hashing the
exact memory address of the lock causing the block, extracted
from t->blocker, the kernel deterministically groups tasks
waiting on the identical resource
3. For duplicate tasks, we still print the single-line "INFO: task
..." message and trigger tracepoint trace_sched_process_hang().
It merely skips calling sched_show_task() and
debug_show_blocker(), printing a concise suppression notice
instead
Signed-off-by: Aaron Tomlin <atomlin@atomlin.com>
---
Changes since v3:
- Deduct from the global budget if printing a full stack trace
- Pivoted from heuristic Wait Channel hashing to deterministic
blocker address hashing via CONFIG_DETECT_HUNG_TASK_BLOCKER
- Replaced the hung_task_reported bit-field with a standalone u8 byte.
Move hung_task_reported into an existing structural alignment hole
within task_struct following blocked_lock, resulting in zero overall
memory footprint increase and optimal cacheline grouping
- Linked to v3: https://lore.kernel.org/lkml/20260621213756.43225-1-atomlin@atomlin.com/
Changes since v2:
- Replaced the per-round cache flush with a task_struct bit-field for
persistent cross-scan tracking, mitigating delayed budget exhaustion
- Abandoned exact-stack hashing in favour of Wait Channel hashing
- Transitioned from jhash() to hash_long() to optimise single-pointer
hashing, and relocated the hash map to the local stack
- Linked to v2: https://lore.kernel.org/lkml/20260620013559.1537893-1-atomlin@atomlin.com/
Changes since v1:
- Preserve "INFO:" headers for all hung tasks; suppress only the stack
dumps for duplicates (Masami Hiramatsu)
- Print a clear notification when a trace is explicitly suppressed
- Add #ifdef CONFIG_STACKTRACE guards to prevent Kconfig build errors
- Optimise overhead by unwinding the stack only if a warning is
actually going to be printed
- Linked to v1: https://lore.kernel.org/lkml/20260617184841.1447955-1-atomlin@atomlin.com/
---
include/linux/hung_task.h | 1 +
include/linux/sched.h | 3 +++
kernel/hung_task.c | 43 ++++++++++++++++++++++++++++++++++-----
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
index c4403eeb7144..3b161f284a7b 100644
--- a/include/linux/hung_task.h
+++ b/include/linux/hung_task.h
@@ -36,6 +36,7 @@
#define BLOCKER_TYPE_RWSEM_WRITER 0x03UL
#define BLOCKER_TYPE_MASK 0x03UL
+#define BLOCKER_HASH_BITS 6
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
static inline void hung_task_set_blocker(void *lock, unsigned long type)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3204a15d512..deb092ff8a9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1252,6 +1252,9 @@ struct task_struct {
struct mutex *blocked_on; /* lock we're blocked on */
raw_spinlock_t blocked_lock;
+#ifdef CONFIG_DETECT_HUNG_TASK
+ u8 hung_task_reported;
+#endif
/*
* The task that is boosting this task; a back link for the current
* donor stack. Set in schedule() -> find_proxy_task() and only stable
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 6fcc94ce4ca9..a94f570b33d5 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -25,6 +25,7 @@
#include <linux/hung_task.h>
#include <linux/rwsem.h>
#include <linux/sys_info.h>
+#include <linux/hash.h>
#include <trace/events/sched.h>
@@ -125,6 +126,7 @@ static bool task_is_hung(struct task_struct *t, unsigned long timeout)
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
t->last_switch_time = jiffies;
+ t->hung_task_reported = 0;
return false;
}
if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
@@ -228,12 +230,14 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
* @t: Pointer to the detected hung task.
* @timeout: Timeout threshold for detecting hung tasks
* @this_round_count: Count of hung tasks detected in the current iteration
+ * @skip_show_task: Indicating if stack trace should be skipped
*
* Print structured information about the specified hung task, if warnings
* are enabled or if the panic batch threshold is exceeded.
*/
static void hung_task_info(struct task_struct *t, unsigned long timeout,
- unsigned long this_round_count)
+ unsigned long this_round_count,
+ unsigned int skip_show_task)
{
trace_sched_process_hang(t);
@@ -248,7 +252,11 @@ static void hung_task_info(struct task_struct *t, unsigned long timeout,
* accordingly
*/
if (sysctl_hung_task_warnings || hung_task_call_panic) {
- if (sysctl_hung_task_warnings > 0)
+ /*
+ * Do not exhaust the global warning budget for duplicates;
+ * only decrement if a full stack trace is being printed.
+ */
+ if (!skip_show_task && sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked%s for more than %ld seconds.\n",
t->comm, t->pid, t->in_iowait ? " in I/O wait" : "",
@@ -261,8 +269,12 @@ static void hung_task_info(struct task_struct *t, unsigned long timeout,
pr_err(" Blocked by coredump.\n");
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
- sched_show_task(t);
- debug_show_blocker(t, timeout);
+ if (!skip_show_task) {
+ sched_show_task(t);
+ debug_show_blocker(t, timeout);
+ } else {
+ pr_err(" Stack trace suppressed. Already reported or duplicate\n");
+ }
if (!sysctl_hung_task_warnings)
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
@@ -306,6 +318,11 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
unsigned long this_round_count;
int need_warning = sysctl_hung_task_warnings;
unsigned long si_mask = hung_task_si_mask;
+ unsigned int skip_show_task;
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ unsigned long blocker, blocker_hash[1 << BLOCKER_HASH_BITS] = { 0 };
+ unsigned int hash;
+#endif
/*
* If the system crashed already then all bets are off,
@@ -326,6 +343,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
}
if (task_is_hung(t, timeout)) {
+ skip_show_task = t->hung_task_reported;
/*
* Increment the global counter so that userspace could
* start migrating tasks ASAP. But count the current
@@ -334,7 +352,22 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
*/
atomic_long_inc(&sysctl_hung_task_detect_count);
this_round_count++;
- hung_task_info(t, timeout, this_round_count);
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ blocker = READ_ONCE(t->blocker);
+ if (blocker) {
+ blocker &= ~BLOCKER_TYPE_MASK;
+ hash = hash_long(blocker, BLOCKER_HASH_BITS);
+ if (blocker_hash[hash] == blocker)
+ skip_show_task = 1;
+ else
+ blocker_hash[hash] = blocker;
+ }
+#endif
+
+ hung_task_info(t, timeout, this_round_count,
+ skip_show_task);
+ t->hung_task_reported = 1;
}
}
unlock:
--
2.51.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-06-27 20:57 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-27 20:57 [PATCH v4] hung_task: Deduplicate identical hang reports using explicit blocker tracking Aaron Tomlin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.