From: Aaron Tomlin <atomlin@atomlin.com>
To: akpm@linux-foundation.org, lance.yang@linux.dev,
mhiramat@kernel.org, pmladek@suse.com
Cc: linux-kernel@vger.kernel.org, atomlin@atomlin.com,
neelx@suse.com, sean@ashe.io, chjohnst@gmail.com, steve@abita.co,
mproche@gmail.com, nick.lange@gmail.com
Subject: [PATCH] hung_task: Add per-round stack trace deduplication
Date: Wed, 17 Jun 2026 14:48:41 -0400 [thread overview]
Message-ID: <20260617184841.1447955-1-atomlin@atomlin.com> (raw)
Currently, when multiple tasks hang in the exact same location (e.g.,
such as severe contention for a mutex), khungtaskd indiscriminately
reports every single instance. This wastes ring buffer space with
identical stack traces up to the defined warning limit (i.e.,
kernel.hung_task_warnings), obscuring the root cause without providing
any additional diagnostic value.
Introduce a lightweight, hash-based stack trace deduplicator for
khungtaskd to ensure only unique stack traces are reported during
a single detection interval.
Technical details of the implementation:
- Uses a 12-bit hash table (4096 slots), consuming just 16 KB of
static memory to prevent cache thrashing during massive hangs.
- Operates purely serially within the single khungtaskd thread,
requiring zero atomic operations or concurrent locking overhead.
- Flushes the lossy cache via memset() at the beginning of each
detection round. This ensures the immediate "thundering herd" of
duplicates is suppressed, but guarantees the system will not
permanently suppress identical hangs that occur in future rounds.
- Introduces a new sysctl, kernel.hung_task_dedup, which defaults to 1
(enabled). The sysctl is locally cached at the outset of each
interval to prevent tearing caused by concurrent userspace toggling.
Signed-off-by: Aaron Tomlin <atomlin@atomlin.com>
--
---
kernel/hung_task.c | 82 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 79 insertions(+), 3 deletions(-)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 6fcc94ce4ca9..96ec133965d7 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -25,6 +25,9 @@
#include <linux/hung_task.h>
#include <linux/rwsem.h>
#include <linux/sys_info.h>
+#include <linux/stacktrace.h>
+#include <linux/jhash.h>
+#include <linux/hash.h>
#include <trace/events/sched.h>
@@ -59,6 +62,21 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
static int __read_mostly sysctl_hung_task_warnings = 10;
+/*
+ * Sizing the deduplicator hash table.
+ * 12 bits provides 4096 slots, costing just 16 KB of static memory.
+ */
+#define HUNG_TASK_HT_BITS 12
+#define HUNG_TASK_HT_SIZE (1UL << HUNG_TASK_HT_BITS)
+
+static u32 hung_task_hash_table[HUNG_TASK_HT_SIZE];
+
+/*
+ * Enable or disable stack trace deduplication.
+ * Defaults to 1 (enabled).
+ */
+static int __read_mostly sysctl_hung_task_dedup = 1;
+
static int __read_mostly did_panic;
static bool hung_task_call_panic;
@@ -223,17 +241,54 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
}
#endif
+/**
+ * hung_task_stack_is_unique - Check if a stack trace has been seen this round
+ * @t: Pointer to the task structure
+ *
+ * Captures the stack trace of the task, hashes it, and checks our lossy
+ * cache. Since this is only called serially from the khungtaskd thread,
+ * no concurrent locks or atomics are required. Returns true if the stack
+ * is unique (or we collided), false if it is a known duplicate.
+ */
+static bool hung_task_stack_is_unique(struct task_struct *t)
+{
+ unsigned long entries[64];
+ unsigned int nr_entries;
+ u32 hash, idx;
+
+ nr_entries = stack_trace_save_tsk(t, entries, ARRAY_SIZE(entries), 0);
+ hash = jhash2((u32 *)entries, nr_entries * (sizeof(unsigned long) /
+ sizeof(u32)), JHASH_INITVAL);
+
+ if (unlikely(!hash))
+ hash = 1;
+
+ idx = hash_32(hash, HUNG_TASK_HT_BITS);
+
+ if (hung_task_hash_table[idx] == hash)
+ return false;
+
+ hung_task_hash_table[idx] = hash;
+
+ return true;
+}
+
/**
* hung_task_info - Print diagnostic details for a hung task
* @t: Pointer to the detected hung task.
* @timeout: Timeout threshold for detecting hung tasks
* @this_round_count: Count of hung tasks detected in the current iteration
+ * @dedup_enabled: Snapshot of sysctl_hung_task_dedup for the current interval.
*
* Print structured information about the specified hung task, if warnings
- * are enabled or if the panic batch threshold is exceeded.
+ * are enabled or if the panic batch threshold is exceeded. If @dedup_enabled
+ * is true, identical stack traces within the same detection round are
+ * intercepted and suppressed to save ring buffer space.
*/
static void hung_task_info(struct task_struct *t, unsigned long timeout,
- unsigned long this_round_count)
+ unsigned long this_round_count,
+ bool dedup_enabled)
+
{
trace_sched_process_hang(t);
@@ -242,6 +297,10 @@ static void hung_task_info(struct task_struct *t, unsigned long timeout,
hung_task_call_panic = true;
}
+ /* Intercept and drop duplicate stack traces */
+ if (dedup_enabled && !hung_task_stack_is_unique(t))
+ return;
+
/*
* The given task did not get scheduled for more than
* CONFIG_DEFAULT_HUNG_TASK_TIMEOUT. Therefore, complain
@@ -306,6 +365,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
unsigned long this_round_count;
int need_warning = sysctl_hung_task_warnings;
unsigned long si_mask = hung_task_si_mask;
+ bool dedup_enabled = READ_ONCE(sysctl_hung_task_dedup);
/*
* If the system crashed already then all bets are off,
@@ -314,6 +374,13 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
if (test_taint(TAINT_DIE) || did_panic)
return;
+ /*
+ * Reset the de-duplicator hash table for this new detection round.
+ * This prevents stale hashes from permanently suppressing.
+ */
+ if (dedup_enabled)
+ memset(hung_task_hash_table, 0, sizeof(hung_task_hash_table));
+
this_round_count = 0;
rcu_read_lock();
for_each_process_thread(g, t) {
@@ -334,7 +401,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
*/
atomic_long_inc(&sysctl_hung_task_detect_count);
this_round_count++;
- hung_task_info(t, timeout, this_round_count);
+ hung_task_info(t, timeout, this_round_count, dedup_enabled);
}
}
unlock:
@@ -483,6 +550,15 @@ static const struct ctl_table hung_task_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_NEG_ONE,
},
+ {
+ .procname = "hung_task_dedup",
+ .data = &sysctl_hung_task_dedup,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{
.procname = "hung_task_detect_count",
.maxlen = sizeof(unsigned long),
--
2.51.0
next reply other threads:[~2026-06-17 18:49 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-17 18:48 Aaron Tomlin [this message]
2026-06-17 22:04 ` [PATCH] hung_task: Add per-round stack trace deduplication David Laight
2026-06-18 0:59 ` Masami Hiramatsu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260617184841.1447955-1-atomlin@atomlin.com \
--to=atomlin@atomlin.com \
--cc=akpm@linux-foundation.org \
--cc=chjohnst@gmail.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=mproche@gmail.com \
--cc=neelx@suse.com \
--cc=nick.lange@gmail.com \
--cc=pmladek@suse.com \
--cc=sean@ashe.io \
--cc=steve@abita.co \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox