All of lore.kernel.org
 help / color / mirror / Atom feed
* [merged mm-nonmm-stable] kernel-watchdog-add-sys-kernel-hardsoftlockup_count.patch removed from -mm tree
@ 2025-05-21 17:49 Andrew Morton
  0 siblings, 0 replies; only message in thread
From: Andrew Morton @ 2025-05-21 17:49 UTC (permalink / raw)
  To: mm-commits, song, kees, joel.granados, dianders, cminyard,
	max.kellermann, akpm


The quilt patch titled
     Subject: kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
has been removed from the -mm tree.  Its filename was
     kernel-watchdog-add-sys-kernel-hardsoftlockup_count.patch

This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

------------------------------------------------------
From: Max Kellermann <max.kellermann@ionos.com>
Subject: kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
Date: Sun, 4 May 2025 20:08:30 +0200

Patch series "sysfs: add counters for lockups and stalls", v2.

Commits 9db89b411170 ("exit: Expose "oops_count" to sysfs") and
8b05aa263361 ("panic: Expose "warn_count" to sysfs") added counters for
oopses and warnings to sysfs, and these two patches do the same for
hard/soft lockups and RCU stalls.

All of these counters are useful for monitoring tools to detect whether
the machine is healthy.  If the kernel has experienced a lockup or a
stall, it's probably due to a kernel bug, and I'd like to detect that
quickly and easily.  There is currently no way to detect that, other than
parsing dmesg.  Or observing indirect effects: such as certain tasks not
responding, but then I need to observe all tasks, and it may take a while
until these effects become visible/measurable.  I'd rather be able to
detect the primary cause more quickly, possibly before everything falls
apart.


This patch (of 2):

There is /proc/sys/kernel/hung_task_detect_count, /sys/kernel/warn_count
and /sys/kernel/oops_count but there is no userspace-accessible counter
for hard/soft lockups.  Having this is useful for monitoring tools.

Link: https://lkml.kernel.org/r/20250504180831.4190860-1-max.kellermann@ionos.com
Link: https://lkml.kernel.org/r/20250504180831.4190860-2-max.kellermann@ionos.com
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Cc:
Cc: Core Minyard <cminyard@mvista.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Kees Cook <kees@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 Documentation/ABI/testing/sysfs-kernel-hardlockup_count |    7 +
 Documentation/ABI/testing/sysfs-kernel-softlockup_count |    7 +
 kernel/watchdog.c                                       |   53 ++++++++++
 3 files changed, 67 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
new file mode 100644
--- /dev/null
+++ a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
@@ -0,0 +1,7 @@
+What:		/sys/kernel/hardlockup_count
+Date:		May 2025
+KernelVersion:	6.16
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+		Shows how many times the system has detected a hard lockup since last boot.
+		Available only if CONFIG_HARDLOCKUP_DETECTOR is enabled.
diff --git a/Documentation/ABI/testing/sysfs-kernel-softlockup_count a/Documentation/ABI/testing/sysfs-kernel-softlockup_count
new file mode 100644
--- /dev/null
+++ a/Documentation/ABI/testing/sysfs-kernel-softlockup_count
@@ -0,0 +1,7 @@
+What:		/sys/kernel/softlockup_count
+Date:		May 2025
+KernelVersion:	6.16
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+		Shows how many times the system has detected a soft lockup since last boot.
+		Available only if CONFIG_SOFTLOCKUP_DETECTOR is enabled.
--- a/kernel/watchdog.c~kernel-watchdog-add-sys-kernel-hardsoftlockup_count
+++ a/kernel/watchdog.c
@@ -64,6 +64,29 @@ int __read_mostly sysctl_hardlockup_all_
  */
 unsigned int __read_mostly hardlockup_panic =
 			IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
+
+#ifdef CONFIG_SYSFS
+
+static unsigned int hardlockup_count;
+
+static ssize_t hardlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+				     char *page)
+{
+	return sysfs_emit(page, "%u\n", hardlockup_count);
+}
+
+static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count);
+
+static __init int kernel_hardlockup_sysfs_init(void)
+{
+	sysfs_add_file_to_group(kernel_kobj, &hardlockup_count_attr.attr, NULL);
+	return 0;
+}
+
+late_initcall(kernel_hardlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
 /*
  * We may not want to enable hard lockup detection by default in all cases,
  * for example when running the kernel as a guest on a hypervisor. In these
@@ -170,6 +193,10 @@ void watchdog_hardlockup_check(unsigned
 		unsigned int this_cpu = smp_processor_id();
 		unsigned long flags;
 
+#ifdef CONFIG_SYSFS
+		++hardlockup_count;
+#endif
+
 		/* Only print hardlockups once. */
 		if (per_cpu(watchdog_hardlockup_warned, cpu))
 			return;
@@ -312,6 +339,28 @@ unsigned int __read_mostly softlockup_pa
 static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
+#ifdef CONFIG_SYSFS
+
+static unsigned int softlockup_count;
+
+static ssize_t softlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+				     char *page)
+{
+	return sysfs_emit(page, "%u\n", softlockup_count);
+}
+
+static struct kobj_attribute softlockup_count_attr = __ATTR_RO(softlockup_count);
+
+static __init int kernel_softlockup_sysfs_init(void)
+{
+	sysfs_add_file_to_group(kernel_kobj, &softlockup_count_attr.attr, NULL);
+	return 0;
+}
+
+late_initcall(kernel_softlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
 /* Timestamp taken after the last successful reschedule. */
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 /* Timestamp of the last softlockup report. */
@@ -743,6 +792,10 @@ static enum hrtimer_restart watchdog_tim
 	touch_ts = __this_cpu_read(watchdog_touch_ts);
 	duration = is_softlockup(touch_ts, period_ts, now);
 	if (unlikely(duration)) {
+#ifdef CONFIG_SYSFS
+		++softlockup_count;
+#endif
+
 		/*
 		 * Prevent multiple soft-lockup reports if one cpu is already
 		 * engaged in dumping all cpu back traces.
_

Patches currently in -mm which might be from max.kellermann@ionos.com are



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-05-21 17:49 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-21 17:49 [merged mm-nonmm-stable] kernel-watchdog-add-sys-kernel-hardsoftlockup_count.patch removed from -mm tree Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.