* [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
@ 2025-05-02 10:39 Max Kellermann
2025-05-02 10:39 ` [PATCH 2/2] kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count Max Kellermann
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Max Kellermann @ 2025-05-02 10:39 UTC (permalink / raw)
To: akpm, song, joel.granados, dianders, cminyard, linux-kernel
Cc: Max Kellermann
There is /proc/sys/kernel/hung_task_detect_count,
/sys/kernel/warn_count and /sys/kernel/oops_count but there is no
userspace-accessible counter for hard/soft lockups. Having this is
useful for monitoring tools.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
kernel/watchdog.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9fa2af9dbf2c..09994bfb47af 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -63,6 +63,29 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
*/
unsigned int __read_mostly hardlockup_panic =
IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
+
+#ifdef CONFIG_SYSFS
+
+static unsigned int hardlockup_count;
+
+static ssize_t hardlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", hardlockup_count);
+}
+
+static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count);
+
+static __init int kernel_hardlockup_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &hardlockup_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_hardlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
@@ -169,6 +192,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
unsigned int this_cpu = smp_processor_id();
unsigned long flags;
+#ifdef CONFIG_SYSFS
+ ++hardlockup_count;
+#endif
+
/* Only print hardlockups once. */
if (per_cpu(watchdog_hardlockup_warned, cpu))
return;
@@ -311,6 +338,28 @@ unsigned int __read_mostly softlockup_panic =
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
+#ifdef CONFIG_SYSFS
+
+static unsigned int softlockup_count;
+
+static ssize_t softlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", softlockup_count);
+}
+
+static struct kobj_attribute softlockup_count_attr = __ATTR_RO(softlockup_count);
+
+static __init int kernel_softlockup_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &softlockup_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_softlockup_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
/* Timestamp taken after the last successful reschedule. */
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
/* Timestamp of the last softlockup report. */
@@ -742,6 +791,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
touch_ts = __this_cpu_read(watchdog_touch_ts);
duration = is_softlockup(touch_ts, period_ts, now);
if (unlikely(duration)) {
+#ifdef CONFIG_SYSFS
+ ++softlockup_count;
+#endif
+
/*
* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping all cpu back traces.
--
2.47.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count
2025-05-02 10:39 [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Max Kellermann
@ 2025-05-02 10:39 ` Max Kellermann
2025-05-02 14:39 ` [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Doug Anderson
2025-05-04 2:47 ` Andrew Morton
2 siblings, 0 replies; 7+ messages in thread
From: Max Kellermann @ 2025-05-02 10:39 UTC (permalink / raw)
To: akpm, song, joel.granados, dianders, cminyard, linux-kernel
Cc: Max Kellermann
Exposing a simple counter to userspace for monitoring tools.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
kernel/rcu/tree_stall.h | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 925fcdad5dea..158330524795 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -20,6 +20,28 @@
int sysctl_panic_on_rcu_stall __read_mostly;
int sysctl_max_rcu_stall_to_panic __read_mostly;
+#ifdef CONFIG_SYSFS
+
+static unsigned int rcu_stall_count;
+
+static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", rcu_stall_count);
+}
+
+static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
+
+static __init int kernel_rcu_stall_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_rcu_stall_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
@@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
if (kvm_check_and_clear_guest_paused())
return;
+#ifdef CONFIG_SYSFS
+ ++rcu_stall_count;
+#endif
+
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
--
2.47.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
2025-05-02 10:39 [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Max Kellermann
2025-05-02 10:39 ` [PATCH 2/2] kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count Max Kellermann
@ 2025-05-02 14:39 ` Doug Anderson
2025-05-02 15:12 ` Max Kellermann
2025-05-04 2:47 ` Andrew Morton
2 siblings, 1 reply; 7+ messages in thread
From: Doug Anderson @ 2025-05-02 14:39 UTC (permalink / raw)
To: Max Kellermann; +Cc: akpm, song, joel.granados, cminyard, linux-kernel
Hi,
On Fri, May 2, 2025 at 3:39 AM Max Kellermann <max.kellermann@ionos.com> wrote:
>
> There is /proc/sys/kernel/hung_task_detect_count,
> /sys/kernel/warn_count and /sys/kernel/oops_count but there is no
> userspace-accessible counter for hard/soft lockups. Having this is
> useful for monitoring tools.
Hmmm. I suspect this has more to do with the fact that both hard and
soft lockups nearly always end up being fatal. ...but I guess
technically they could be recovered from and the kernel can be
configured not to panic, so I guess it would be OK to add something
like this.
It feels like there would be a better place for these to go than
straight in `/sys/kernel`, though I don't really know it. Maybe
someone else on this thread has opinions? Any chance they could go in
"debugfs"?
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
> kernel/watchdog.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index 9fa2af9dbf2c..09994bfb47af 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -63,6 +63,29 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
> */
> unsigned int __read_mostly hardlockup_panic =
> IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
> +
> +#ifdef CONFIG_SYSFS
> +
> +static unsigned int hardlockup_count;
> +
> +static ssize_t hardlockup_count_show(struct kobject *kobj, struct kobj_attribute *attr,
> + char *page)
> +{
> + return sysfs_emit(page, "%u\n", hardlockup_count);
> +}
> +
> +static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count);
> +
> +static __init int kernel_hardlockup_sysfs_init(void)
> +{
> + sysfs_add_file_to_group(kernel_kobj, &hardlockup_count_attr.attr, NULL);
> + return 0;
> +}
> +
> +late_initcall(kernel_hardlockup_sysfs_init);
> +
> +#endif // CONFIG_SYSFS
> +
> /*
> * We may not want to enable hard lockup detection by default in all cases,
> * for example when running the kernel as a guest on a hypervisor. In these
> @@ -169,6 +192,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
> unsigned int this_cpu = smp_processor_id();
> unsigned long flags;
>
> +#ifdef CONFIG_SYSFS
> + ++hardlockup_count;
> +#endif
Please no embedded ifdefs like this. I personally wouldn't hate it if
the "unsigned int" was simply always defined, but if we want to keep
it only defined for sysfs then please use a function to increment this
that's declared as a static inline noop in the case that sysfs is off.
-Doug
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
2025-05-02 14:39 ` [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Doug Anderson
@ 2025-05-02 15:12 ` Max Kellermann
0 siblings, 0 replies; 7+ messages in thread
From: Max Kellermann @ 2025-05-02 15:12 UTC (permalink / raw)
To: Doug Anderson; +Cc: akpm, song, joel.granados, cminyard, linux-kernel
On Fri, May 2, 2025 at 4:40 PM Doug Anderson <dianders@chromium.org> wrote:
> Hmmm. I suspect this has more to do with the fact that both hard and
> soft lockups nearly always end up being fatal. ...but I guess
> technically they could be recovered from and the kernel can be
> configured not to panic, so I guess it would be OK to add something
> like this.
This isn't really about recovering from such a lockup but about
detecting it easily+quickly with monitoring tools. A machine that had
a soft lockup still works (for some definition of "works"), and you
cannot detect this condition without parsing dmesg.
softlockup_panic is a bigger hammer with pros and cons of its own.
> It feels like there would be a better place for these to go than
> straight in `/sys/kernel`, though I don't really know it. Maybe
> someone else on this thread has opinions? Any chance they could go in
> "debugfs"?
"oops_count" and "warn_count" in the same directory is prior art.
I don't think this belongs in debugfs, as debugfs is not considered a
stable ABI, and this feature really isn't about debugging but about
measuring server health.
> Please no embedded ifdefs like this. I personally wouldn't hate it if
> the "unsigned int" was simply always defined, but if we want to keep
> it only defined for sysfs then please use a function to increment this
> that's declared as a static inline noop in the case that sysfs is off.
That would just move the ifdef somewhere else with more code lines,
for something as trivial as incrementing an integer. It's a question
of taste, and I'll follow whatever coding style the maintainers
prefer.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
2025-05-02 10:39 [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Max Kellermann
2025-05-02 10:39 ` [PATCH 2/2] kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count Max Kellermann
2025-05-02 14:39 ` [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Doug Anderson
@ 2025-05-04 2:47 ` Andrew Morton
2025-05-04 6:36 ` Max Kellermann
2 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2025-05-04 2:47 UTC (permalink / raw)
To: Max Kellermann; +Cc: song, joel.granados, dianders, cminyard, linux-kernel
On Fri, 2 May 2025 12:39:04 +0200 Max Kellermann <max.kellermann@ionos.com> wrote:
> There is /proc/sys/kernel/hung_task_detect_count,
> /sys/kernel/warn_count and /sys/kernel/oops_count but there is no
> userspace-accessible counter for hard/soft lockups.
Documenation/, please?
> Having this is useful for monitoring tools.
Useful how? Use cases? Examples?
A proposal to permanently extend Linux's userspace API requires better
justification than an unsubstantiated assertion, surely?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
2025-05-04 2:47 ` Andrew Morton
@ 2025-05-04 6:36 ` Max Kellermann
2025-05-04 6:54 ` Andrew Morton
0 siblings, 1 reply; 7+ messages in thread
From: Max Kellermann @ 2025-05-04 6:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: song, joel.granados, dianders, cminyard, linux-kernel
On Sun, May 4, 2025 at 4:47 AM Andrew Morton <akpm@linux-foundation.org> wrote:
> Documenation/, please?
Do you mean Documentation/ABI/testing/ ? (like
Documentation/ABI/testing/sysfs-kernel-oops_count)
I'll add that; I was confused by the directory name "testing" and
didn't expect to find actual documentation there.
> > Having this is useful for monitoring tools.
>
> Useful how? Use cases? Examples?
To detect whether the machine is healthy. If the kernel has
experienced a soft lockup, it's probably due to a kernel bug, and I'd
like to detect that quickly and easily. There is currently no way to
detect that, other than parsing dmesg. Or observing indirect effects:
such as certain tasks not responding, but then I need to observe all
tasks. I'd rather be able to detect the primary cause easily - just
like some people decided that they want to observe an oops and a
warning counter.
We always run the latest stable kernel on our production servers, and
this has brought great sorrow for the last year (I think the big netfs
drama began in 6.9 or so when the pgpriv2 refactoring began). There
have been numerous netfs/NFS/Ceph regressions, we had just as many
production outages, and the maintainers wouldn't respond to my bug
reports, so I had to figure it all out myself.
The latest regression that quickly took down our servers was a
"stable" backport of a performance optimization for epoll in 6.14.4,
leading to soft lockups in ep_poll(), see
https://lore.kernel.org/lkml/20250429185827.3564438-1-max.kellermann@ionos.com/
- but we observed it only after everything had already fallen apart.
Since our main process has switched from epoll to io_uring, only
second-order processes were falling apart. Had we had a soft lockup
counter, we could have noticed it earlier.
> A proposal to permanently extend Linux's userspace API requires better
> justification than an unsubstantiated assertion, surely?
The commits that added warn_count/oops_count literally only said "is a
fairly interesting signal". See commits 9db89b411170 ("exit: Expose
"oops_count" to sysfs") and 8b05aa263361 ("panic: Expose "warn_count"
to sysfs"). That's quite an unsubstantiated assertion, too, isn't it?
I agree with you, but I thought the point for a soft lockup counter
was trivial enough to see, and I didn't think you needed more
justification than the other counters.
Max
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count
2025-05-04 6:36 ` Max Kellermann
@ 2025-05-04 6:54 ` Andrew Morton
0 siblings, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2025-05-04 6:54 UTC (permalink / raw)
To: Max Kellermann
Cc: song, joel.granados, dianders, cminyard, linux-kernel, Kees Cook
On Sun, 4 May 2025 08:36:23 +0200 Max Kellermann <max.kellermann@ionos.com> wrote:
> On Sun, May 4, 2025 at 4:47 AM Andrew Morton <akpm@linux-foundation.org> wrote:
> > Documenation/, please?
>
> Do you mean Documentation/ABI/testing/ ? (like
> Documentation/ABI/testing/sysfs-kernel-oops_count)
> I'll add that; I was confused by the directory name "testing" and
> didn't expect to find actual documentation there.
I find it helpful to grep around for similar things:
hp2:/usr/src/25> egrep -rl "hung_task_detect_count|warn_count|oops_count" Documentation
Documentation/ABI/testing/sysfs-kernel-warn_count
Documentation/ABI/testing/sysfs-kernel-oops_count
Documentation/admin-guide/sysctl/kernel.rst
I'm not sure that we've been very complete/consistent in these things.
If you have time, please check that we've covered these things
appropriately.
> > > Having this is useful for monitoring tools.
> >
> > Useful how? Use cases? Examples?
>
> To detect whether the machine is healthy. If the kernel has
> experienced a soft lockup, it's probably due to a kernel bug, and I'd
> like to detect that quickly and easily. There is currently no way to
> detect that, other than parsing dmesg. Or observing indirect effects:
> such as certain tasks not responding, but then I need to observe all
> tasks. I'd rather be able to detect the primary cause easily - just
> like some people decided that they want to observe an oops and a
> warning counter.
>
> We always run the latest stable kernel on our production servers, and
> this has brought great sorrow for the last year (I think the big netfs
> drama began in 6.9 or so when the pgpriv2 refactoring began). There
> have been numerous netfs/NFS/Ceph regressions, we had just as many
> production outages, and the maintainers wouldn't respond to my bug
> reports, so I had to figure it all out myself.
> The latest regression that quickly took down our servers was a
> "stable" backport of a performance optimization for epoll in 6.14.4,
> leading to soft lockups in ep_poll(), see
> https://lore.kernel.org/lkml/20250429185827.3564438-1-max.kellermann@ionos.com/
> - but we observed it only after everything had already fallen apart.
> Since our main process has switched from epoll to io_uring, only
> second-order processes were falling apart. Had we had a soft lockup
> counter, we could have noticed it earlier.
That's all great stuff, thanks. Please include it in the [0/N]?
> > A proposal to permanently extend Linux's userspace API requires better
> > justification than an unsubstantiated assertion, surely?
>
> The commits that added warn_count/oops_count literally only said "is a
> fairly interesting signal". See commits 9db89b411170 ("exit: Expose
> "oops_count" to sysfs") and 8b05aa263361 ("panic: Expose "warn_count"
> to sysfs"). That's quite an unsubstantiated assertion, too, isn't it?
>
> I agree with you, but I thought the point for a soft lockup counter
> was trivial enough to see, and I didn't think you needed more
> justification than the other counters.
um, well, Kees, sorry, that wasn't a world class effort.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-05-04 6:54 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-02 10:39 [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Max Kellermann
2025-05-02 10:39 ` [PATCH 2/2] kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count Max Kellermann
2025-05-02 14:39 ` [PATCH 1/2] kernel/watchdog: add /sys/kernel/{hard,soft}lockup_count Doug Anderson
2025-05-02 15:12 ` Max Kellermann
2025-05-04 2:47 ` Andrew Morton
2025-05-04 6:36 ` Max Kellermann
2025-05-04 6:54 ` Andrew Morton
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.