linux-doc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] genirq: add support for warning on long-running IRQ handlers
@ 2025-07-14  8:41 Wladislav Wiebe
  2025-07-18 20:53 ` Thomas Gleixner
  2025-07-22  8:21 ` Jiri Slaby
  0 siblings, 2 replies; 5+ messages in thread
From: Wladislav Wiebe @ 2025-07-14  8:41 UTC (permalink / raw)
  To: tglx, corbet
  Cc: akpm, paulmck, rostedt, Neeraj.Upadhyay, david, bp, arnd, fvdl,
	linux-doc, linux-kernel, wladislav.wiebe, peterz

This patch adds a mechanism to detect and warn about long-running IRQ
handlers exceeding a user-defined duration threshold in microseconds.

The feature is enabled via the kernel boot parameter:
"irqhandler.duration_warn_us=<threshold_in_us>"

For example, passing irqhandler.duration_warn_us=1000 will warn if an
IRQ handler takes more than 1000 microseconds.

Implementation uses local_clock() to measure the execution duration of
IRQ handlers. When the threshold is exceeded, a ratelimited warning is
printed:

"[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"

Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
---
V1 -> V2: refactor to use local_clock() instead of jiffies and replace
	  Kconfig knobs by a new command-line parameter.
V1 link:  https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/
---
 .../admin-guide/kernel-parameters.txt         |  5 ++
 kernel/irq/handle.c                           | 48 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..fa89f21ea1e6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2543,6 +2543,11 @@
 			for it. Intended to get systems with badly broken
 			firmware running.
 
+	irqhandler.duration_warn_us= [KNL,EARLY]
+			Warn if an IRQ handler exceeds the specified duration
+			threshold in microseconds. Useful for identifying
+			long-running IRQs in the system.
+
 	irqpoll		[HW]
 			When an interrupt is not handled search all handlers
 			for it. Also check all handlers each timer
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9489f93b3db3..eab8fdfab8d8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
 	wake_up_process(action->thread);
 }
 
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_us __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+	unsigned long val;
+	int ret;
+
+	if (!arg)
+		return 0;
+
+	ret = kstrtoul(arg, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val > 0) {
+		irqhandler_duration_threshold_us = val;
+		static_branch_enable(&irqhandler_duration_check_enabled);
+	} else {
+		pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+					      struct irqaction *action)
+{
+	u64 delta_us = (local_clock() - ts_start) >> 10;
+
+	if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
+		pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
+			smp_processor_id(), irq, action->handler, delta_us);
+	}
+}
+
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 {
 	irqreturn_t retval = IRQ_NONE;
@@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 
 	for_each_action_of_desc(desc, action) {
 		irqreturn_t res;
+		u64 ts_start;
 
 		/*
 		 * If this IRQ would be threaded under force_irqthreads, mark it so.
@@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
 			lockdep_hardirq_threaded();
 
 		trace_irq_handler_entry(irq, action);
-		res = action->handler(irq, action->dev_id);
+
+		if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
+			ts_start = local_clock();
+			res = action->handler(irq, action->dev_id);
+			irqhandler_duration_check(ts_start, irq, action);
+		} else
+			res = action->handler(irq, action->dev_id);
+
 		trace_irq_handler_exit(irq, action, res);
 
 		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
-- 
2.39.3.dirty


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] genirq: add support for warning on long-running IRQ handlers
  2025-07-14  8:41 [PATCH v2] genirq: add support for warning on long-running IRQ handlers Wladislav Wiebe
@ 2025-07-18 20:53 ` Thomas Gleixner
  2025-07-23 18:39   ` Wladislav Wiebe
  2025-07-22  8:21 ` Jiri Slaby
  1 sibling, 1 reply; 5+ messages in thread
From: Thomas Gleixner @ 2025-07-18 20:53 UTC (permalink / raw)
  To: Wladislav Wiebe, corbet
  Cc: akpm, paulmck, rostedt, Neeraj.Upadhyay, david, bp, arnd, fvdl,
	linux-doc, linux-kernel, wladislav.wiebe, peterz

On Mon, Jul 14 2025 at 10:41, Wladislav Wiebe wrote:
> This patch adds a mechanism to detect and warn about long-running IRQ

# git grep 'This patch' Documentation/process/

Also please read:

  https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog

> +static int __init irqhandler_duration_check_setup(char *arg)
> +{
> +	unsigned long val;
> +	int ret;
> +
> +	if (!arg)
> +		return 0;
> +
> +	ret = kstrtoul(arg, 0, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > 0) {
> +		irqhandler_duration_threshold_us = val;
> +		static_branch_enable(&irqhandler_duration_check_enabled);
> +	} else {
> +		pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);

Why early_param? Nothing cares about this during early boot.

> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
> +					      struct irqaction *action)
> +{
> +	u64 delta_us = (local_clock() - ts_start) >> 10;

Lacks a comment that this is an intentional approximation.

> +	if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
> +		pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
> +			smp_processor_id(), irq, action->handler, delta_us);

Please align the arguments in the second line properly.

  https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#line-breaks

> +	}
> +}
> +
>  irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>  {
>  	irqreturn_t retval = IRQ_NONE;
> @@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>  
>  	for_each_action_of_desc(desc, action) {
>  		irqreturn_t res;
> +		u64 ts_start;

This wants to be in the if() branch where it is actually used.

>  		/*
>  		 * If this IRQ would be threaded under force_irqthreads, mark it so.
> @@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>  			lockdep_hardirq_threaded();
>  
>  		trace_irq_handler_entry(irq, action);
> -		res = action->handler(irq, action->dev_id);
> +
> +		if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
> +			ts_start = local_clock();
> +			res = action->handler(irq, action->dev_id);
> +			irqhandler_duration_check(ts_start, irq, action);
> +		} else
> +			res = action->handler(irq, action->dev_id);
> +

Even if not required by C, the else clause wants brackets too for
symmetry.

        if (foo)
        	bar();
        else
                baz();

parses perfectly fine.

        if (foo) {
                do_stuff();
        	bar();
        } else
                baz();

is asymmetrical and disturbs the reading flow, which is pattern
based. The extra brackets just make it easier to parse:

        if (foo) {
                do_stuff();
        	bar();
        } else {
                baz();
        }

See?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] genirq: add support for warning on long-running IRQ handlers
  2025-07-14  8:41 [PATCH v2] genirq: add support for warning on long-running IRQ handlers Wladislav Wiebe
  2025-07-18 20:53 ` Thomas Gleixner
@ 2025-07-22  8:21 ` Jiri Slaby
  2025-07-23 18:34   ` Wladislav Wiebe
  1 sibling, 1 reply; 5+ messages in thread
From: Jiri Slaby @ 2025-07-22  8:21 UTC (permalink / raw)
  To: Wladislav Wiebe, tglx, corbet
  Cc: akpm, paulmck, rostedt, Neeraj.Upadhyay, david, bp, arnd, fvdl,
	linux-doc, linux-kernel, peterz

On 14. 07. 25, 10:41, Wladislav Wiebe wrote:
> This patch adds a mechanism to detect and warn about long-running IRQ
> handlers exceeding a user-defined duration threshold in microseconds.
> 
> The feature is enabled via the kernel boot parameter:
> "irqhandler.duration_warn_us=<threshold_in_us>"
> 
> For example, passing irqhandler.duration_warn_us=1000 will warn if an
> IRQ handler takes more than 1000 microseconds.
> 
> Implementation uses local_clock() to measure the execution duration of
> IRQ handlers. When the threshold is exceeded, a ratelimited warning is
> printed:
> 
> "[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"
> 
> Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
> ---
> V1 -> V2: refactor to use local_clock() instead of jiffies and replace
> 	  Kconfig knobs by a new command-line parameter.
> V1 link:  https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/
> ---
>   .../admin-guide/kernel-parameters.txt         |  5 ++
>   kernel/irq/handle.c                           | 48 ++++++++++++++++++-
>   2 files changed, 52 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index f1f2c0874da9..fa89f21ea1e6 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2543,6 +2543,11 @@
>   			for it. Intended to get systems with badly broken
>   			firmware running.
>   
> +	irqhandler.duration_warn_us= [KNL,EARLY]
> +			Warn if an IRQ handler exceeds the specified duration
> +			threshold in microseconds. Useful for identifying
> +			long-running IRQs in the system.
> +
>   	irqpoll		[HW]
>   			When an interrupt is not handled search all handlers
>   			for it. Also check all handlers each timer
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index 9489f93b3db3..eab8fdfab8d8 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
>   	wake_up_process(action->thread);
>   }
>   
> +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
> +static u64 irqhandler_duration_threshold_us __ro_after_init;
> +
> +static int __init irqhandler_duration_check_setup(char *arg)
> +{
> +	unsigned long val;
> +	int ret;
> +
> +	if (!arg)
> +		return 0;
> +
> +	ret = kstrtoul(arg, 0, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val > 0) {
> +		irqhandler_duration_threshold_us = val;
> +		static_branch_enable(&irqhandler_duration_check_enabled);
> +	} else {
> +		pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
> +		return -EINVAL;

Perhaps invert the condition and drop the "else {}"?

> +	}
> +
> +	return 0;
> +}
> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
> +
> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
> +					      struct irqaction *action)

Can be const.

> +{
> +	u64 delta_us = (local_clock() - ts_start) >> 10;
> +
> +	if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
> +		pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",

s/%d/%u/.
Do you mean "of IRQ[...]"?

> +			smp_processor_id(), irq, action->handler, delta_us);
> +	}
> +}
> +
>   irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>   {
>   	irqreturn_t retval = IRQ_NONE;

thanks,
-- 
js
suse labs


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] genirq: add support for warning on long-running IRQ handlers
  2025-07-22  8:21 ` Jiri Slaby
@ 2025-07-23 18:34   ` Wladislav Wiebe
  0 siblings, 0 replies; 5+ messages in thread
From: Wladislav Wiebe @ 2025-07-23 18:34 UTC (permalink / raw)
  To: Jiri Slaby, tglx, corbet
  Cc: akpm, paulmck, rostedt, Neeraj.Upadhyay, david, bp, arnd, fvdl,
	linux-doc, linux-kernel, peterz

On 22/07/2025 10:21, Jiri Slaby wrote:

>
> On 14. 07. 25, 10:41, Wladislav Wiebe wrote:
>> This patch adds a mechanism to detect and warn about long-running IRQ
>> handlers exceeding a user-defined duration threshold in microseconds.
>>
>> The feature is enabled via the kernel boot parameter:
>> "irqhandler.duration_warn_us=<threshold_in_us>"
>>
>> For example, passing irqhandler.duration_warn_us=1000 will warn if an
>> IRQ handler takes more than 1000 microseconds.
>>
>> Implementation uses local_clock() to measure the execution duration of
>> IRQ handlers. When the threshold is exceeded, a ratelimited warning is
>> printed:
>>
>> "[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"
>>
>> Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
>> ---
>> V1 -> V2: refactor to use local_clock() instead of jiffies and replace
>>         Kconfig knobs by a new command-line parameter.
>> V1 link:  https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/
>> ---
>>   .../admin-guide/kernel-parameters.txt         |  5 ++
>>   kernel/irq/handle.c                           | 48 ++++++++++++++++++-
>>   2 files changed, 52 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index f1f2c0874da9..fa89f21ea1e6 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -2543,6 +2543,11 @@
>>                       for it. Intended to get systems with badly broken
>>                       firmware running.
>>
>> +     irqhandler.duration_warn_us= [KNL,EARLY]
>> +                     Warn if an IRQ handler exceeds the specified duration
>> +                     threshold in microseconds. Useful for identifying
>> +                     long-running IRQs in the system.
>> +
>>       irqpoll         [HW]
>>                       When an interrupt is not handled search all handlers
>>                       for it. Also check all handlers each timer
>> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
>> index 9489f93b3db3..eab8fdfab8d8 100644
>> --- a/kernel/irq/handle.c
>> +++ b/kernel/irq/handle.c
>> @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
>>       wake_up_process(action->thread);
>>   }
>>
>> +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
>> +static u64 irqhandler_duration_threshold_us __ro_after_init;
>> +
>> +static int __init irqhandler_duration_check_setup(char *arg)
>> +{
>> +     unsigned long val;
>> +     int ret;
>> +
>> +     if (!arg)
>> +             return 0;
>> +
>> +     ret = kstrtoul(arg, 0, &val);
>> +     if (ret)
>> +             return ret;
>> +
>> +     if (val > 0) {
>> +             irqhandler_duration_threshold_us = val;
>> +             static_branch_enable(&irqhandler_duration_check_enabled);
>> +     } else {
>> +             pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
>> +             return -EINVAL;
>
> Perhaps invert the condition and drop the "else {}"?
>
>> +     }
>> +
>> +     return 0;
>> +}
>> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
>> +
>> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
>> +                                           struct irqaction *action)
>
> Can be const.
>
>> +{
>> +     u64 delta_us = (local_clock() - ts_start) >> 10;
>> +
>> +     if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
>> +             pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
>
> s/%d/%u/.
> Do you mean "of IRQ[...]"?
>
>> +                     smp_processor_id(), irq, action->handler, delta_us);
>> +     }
>> +}
>> +
>>   irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>>   {
>>       irqreturn_t retval = IRQ_NONE;

Thanks for the comments, I've addressed them in v3:
https://lore.kernel.org/lkml/20250723182836.1177-1-wladislav.wiebe@nokia.com/

- W.W.




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] genirq: add support for warning on long-running IRQ handlers
  2025-07-18 20:53 ` Thomas Gleixner
@ 2025-07-23 18:39   ` Wladislav Wiebe
  0 siblings, 0 replies; 5+ messages in thread
From: Wladislav Wiebe @ 2025-07-23 18:39 UTC (permalink / raw)
  To: Thomas Gleixner, corbet
  Cc: akpm, paulmck, rostedt, Neeraj.Upadhyay, david, bp, arnd, fvdl,
	linux-doc, linux-kernel, peterz


On 18/07/2025 22:53, Thomas Gleixner wrote:
> On Mon, Jul 14 2025 at 10:41, Wladislav Wiebe wrote:
>> This patch adds a mechanism to detect and warn about long-running IRQ
> # git grep 'This patch' Documentation/process/
>
> Also please read:
>
>   https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog
>
>> +static int __init irqhandler_duration_check_setup(char *arg)
>> +{
>> +     unsigned long val;
>> +     int ret;
>> +
>> +     if (!arg)
>> +             return 0;
>> +
>> +     ret = kstrtoul(arg, 0, &val);
>> +     if (ret)
>> +             return ret;
>> +
>> +     if (val > 0) {
>> +             irqhandler_duration_threshold_us = val;
>> +             static_branch_enable(&irqhandler_duration_check_enabled);
>> +     } else {
>> +             pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
>> +             return -EINVAL;
>> +     }
>> +
>> +     return 0;
>> +}
>> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
> Why early_param? Nothing cares about this during early boot.
>
>> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
>> +                                           struct irqaction *action)
>> +{
>> +     u64 delta_us = (local_clock() - ts_start) >> 10;
> Lacks a comment that this is an intentional approximation.
>
>> +     if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
>> +             pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
>> +                     smp_processor_id(), irq, action->handler, delta_us);
> Please align the arguments in the second line properly.
>
>   https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#line-breaks
>
>> +     }
>> +}
>> +
>>  irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>>  {
>>       irqreturn_t retval = IRQ_NONE;
>> @@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>>
>>       for_each_action_of_desc(desc, action) {
>>               irqreturn_t res;
>> +             u64 ts_start;
> This wants to be in the if() branch where it is actually used.
>
>>               /*
>>                * If this IRQ would be threaded under force_irqthreads, mark it so.
>> @@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
>>                       lockdep_hardirq_threaded();
>>
>>               trace_irq_handler_entry(irq, action);
>> -             res = action->handler(irq, action->dev_id);
>> +
>> +             if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
>> +                     ts_start = local_clock();
>> +                     res = action->handler(irq, action->dev_id);
>> +                     irqhandler_duration_check(ts_start, irq, action);
>> +             } else
>> +                     res = action->handler(irq, action->dev_id);
>> +
> Even if not required by C, the else clause wants brackets too for
> symmetry.
>
>         if (foo)
>                 bar();
>         else
>                 baz();
>
> parses perfectly fine.
>
>         if (foo) {
>                 do_stuff();
>                 bar();
>         } else
>                 baz();
>
> is asymmetrical and disturbs the reading flow, which is pattern
> based. The extra brackets just make it easier to parse:
>
>         if (foo) {
>                 do_stuff();
>                 bar();
>         } else {
>                 baz();
>         }
>
> See?
>
> Thanks,
>
>         tglx

Thanks for further comments, I've addressed them in v3:
https://lore.kernel.org/lkml/20250723182836.1177-1-wladislav.wiebe@nokia.com/
- W.W.


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-07-23 18:39 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-14  8:41 [PATCH v2] genirq: add support for warning on long-running IRQ handlers Wladislav Wiebe
2025-07-18 20:53 ` Thomas Gleixner
2025-07-23 18:39   ` Wladislav Wiebe
2025-07-22  8:21 ` Jiri Slaby
2025-07-23 18:34   ` Wladislav Wiebe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).