public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Liu Song <liusong@linux.alibaba.com>
To: Bitao Hu <yaoma@linux.alibaba.com>,
	dianders@chromium.org, akpm@linux-foundation.org,
	pmladek@suse.com, tglx@linutronix.de, maz@kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/3] watchdog/softlockup: low-overhead detection of interrupt storm
Date: Wed, 24 Jan 2024 09:43:55 +0800	[thread overview]
Message-ID: <8d6afe80-58ca-455d-a6ae-797d7c5ce388@linux.alibaba.com> (raw)
In-Reply-To: <20240123121223.22318-2-yaoma@linux.alibaba.com>


在 2024/1/23 20:12, Bitao Hu 写道:
> The following softlockup is caused by interrupt storm, but it cannot be
> identified from the call tree. Because the call tree is just a snapshot
> and doesn't fully capture the behavior of the CPU during the soft lockup.
>    watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921]
>    ...
>    Call trace:
>      __do_softirq+0xa0/0x37c
>      __irq_exit_rcu+0x108/0x140
>      irq_exit+0x14/0x20
>      __handle_domain_irq+0x84/0xe0
>      gic_handle_irq+0x80/0x108
>      el0_irq_naked+0x50/0x58
>
> Therefore,I think it is necessary to report CPU utilization during the
> softlockup_thresh period (report once every sample_period, for a total
> of 5 reportings), like this:
>    watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921]
>    CPU#28 Utilization every 4s during lockup:
>      #1: 0.0% system, 0.0% softirq, 100.0% hardirq, 0.0% idle
>      #2: 0.0% system, 0.0% softirq, 100.0% hardirq, 0.0% idle
>      #3: 0.0% system, 0.0% softirq, 100.0% hardirq, 0.0% idle
>      #4: 0.0% system, 0.0% softirq, 100.0% hardirq, 0.0% idle
>      #5: 0.0% system, 0.0% softirq, 100.0% hardirq, 0.0% idle
>    ...
>
> This would be helpful in determining whether an interrupt storm has
> occurred or in identifying the cause of the softlockup. The criteria for
> determination are as follows:
>    a. If the hardirq utilization is high, then interrupt storm should be
>    considered and the root cause cannot be determined from the call tree.
>    b. If the softirq utilization is high, then we could analyze the call
>    tree but it may cannot reflect the root cause.
>    c. If the system utilization is high, then we could analyze the root
>    cause from the call tree.
>
> Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com>
> ---
>   kernel/watchdog.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 58 insertions(+)
>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index 81a8862295d6..9fad10e0a147 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -23,6 +23,8 @@
>   #include <linux/sched/debug.h>
>   #include <linux/sched/isolation.h>
>   #include <linux/stop_machine.h>
> +#include <linux/kernel_stat.h>
> +#include <linux/math64.h>
>   
>   #include <asm/irq_regs.h>
>   #include <linux/kvm_para.h>
> @@ -441,6 +443,58 @@ static int is_softlockup(unsigned long touch_ts,
>   	return 0;
>   }
>   
> +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
> +static DEFINE_PER_CPU(u64, cpustat_old[NR_STATS]);
> +static DEFINE_PER_CPU(u64, cpustat_diff[5][NR_STATS]);
> +static DEFINE_PER_CPU(int, cpustat_tail);
> +
> +static void update_cpustat(void)
> +{
> +	u64 *old = this_cpu_ptr(cpustat_old);
> +	u64 (*diff)[NR_STATS] = this_cpu_ptr(cpustat_diff);
> +	int tail = this_cpu_read(cpustat_tail), i;
> +	struct kernel_cpustat kcpustat;
> +	u64 *cpustat = kcpustat.cpustat;
> +
> +	kcpustat_cpu_fetch(&kcpustat, smp_processor_id());
> +	for (i = 0; i < NR_STATS; i++) {
> +		diff[tail][i] = cpustat[i] - old[i];
> +		old[i] = cpustat[i];
> +	}
> +	this_cpu_write(cpustat_tail, (tail + 1) % 5);
The number 5 here is related to the 5 in cpustat_diff[5], and it is 
recommended to use a macro definition instead of using the number 5 
directly.
> +}
> +
> +static void print_cpustat(void)
> +{
> +	int i, j, k;
> +	u64 a[5][NR_STATS], b[5][NR_STATS];
Use define instead of the literal number 5.
> +	u64 (*diff)[NR_STATS] = this_cpu_ptr(cpustat_diff);
> +	int tail = this_cpu_read(cpustat_tail);
> +	u32 period_us = sample_period / 1000;
Use NSEC_PER_USEC
> +
> +	for (i = 0; i < 5; i++) {
> +		for (j = 0; j < NR_STATS; j++) {
> +			a[i][j] = 100 * (diff[i][j] / 1000);
> +			b[i][j] = 10 * do_div(a[i][j], period_us);
> +			do_div(b[i][j], period_us);
> +		}
> +	}
> +	printk(KERN_CRIT "CPU#%d Utilization every %us during lockup:\n",
better use "pr_crit", and was the original intent here microseconds (us) 
or milliseconds (ms)?
> +		smp_processor_id(), period_us/1000000);
better use "period_us /NSEC_PER_MSEC"?
> +	for (k = 0, i = tail; k < 5; k++, i = (i + 1) % 5) {

It seems that only i and j are necessary, k is not essential.

> +		printk(KERN_CRIT "\t#%d: %llu.%llu%% system,\t%llu.%llu%% softirq,\t"
> +			"%llu.%llu%% hardirq,\t%llu.%llu%% idle\n", k+1,
> +			a[i][CPUTIME_SYSTEM], b[i][CPUTIME_SYSTEM],
> +			a[i][CPUTIME_SOFTIRQ], b[i][CPUTIME_SOFTIRQ],
> +			a[i][CPUTIME_IRQ], b[i][CPUTIME_IRQ],
> +			a[i][CPUTIME_IDLE], b[i][CPUTIME_IDLE]);
> +	}
> +}
> +#else
> +static inline void update_cpustat(void) { }
> +static inline void print_cpustat(void) { }
> +#endif
> +
>   /* watchdog detector functions */
>   static DEFINE_PER_CPU(struct completion, softlockup_completion);
>   static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
> @@ -504,6 +558,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>   	 */
>   	period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
>   
> +	/* update cpu usage stat */
The function name already indicates that it involves graphs, so the 
comment here appears superfluous.
If a comment is absolutely necessary, please provide more detailed 
information.
> +	update_cpustat();
> +
>   	/* Reset the interval when touched by known problematic code. */
>   	if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
>   		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
> @@ -539,6 +596,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>   		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
>   			smp_processor_id(), duration,
>   			current->comm, task_pid_nr(current));
> +		print_cpustat();
>   		print_modules();
>   		print_irqtrace_events(current);
>   		if (regs)

  reply	other threads:[~2024-01-24  1:44 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-23 12:12 [PATCH 0/3] *** Detect interrupt storm in softlockup *** Bitao Hu
2024-01-23 12:12 ` [PATCH 1/3] watchdog/softlockup: low-overhead detection of interrupt storm Bitao Hu
2024-01-24  1:43   ` Liu Song [this message]
2024-01-24  2:48     ` yaoma
2024-01-25  0:19   ` Doug Anderson
2024-01-25  8:31     ` Bitao Hu
2024-01-25 15:08       ` Doug Anderson
2024-01-26  5:25   ` kernel test robot
2024-01-23 12:12 ` [PATCH 2/3] watchdog/softlockup: report the most time-consuming hardirq Bitao Hu
2024-01-25  0:19   ` Doug Anderson
2024-01-25  7:50     ` Bitao Hu
2024-01-25  9:19     ` Bitao Hu
2024-02-14 23:36   ` Thomas Gleixner
2024-02-14 23:39     ` Doug Anderson
2024-02-15  1:02       ` Thomas Gleixner
2024-01-23 12:12 ` [PATCH 3/3] watchdog/softlockup: add parameter to control the reporting of " Bitao Hu
2024-01-26  5:25   ` kernel test robot
2024-01-26  6:07   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8d6afe80-58ca-455d-a6ae-797d7c5ce388@linux.alibaba.com \
    --to=liusong@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=dianders@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=pmladek@suse.com \
    --cc=tglx@linutronix.de \
    --cc=yaoma@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox