All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] serial: 8250: serialize shared IRQ startup
@ 2026-05-27  9:20 Wang Zhaolong
  2026-06-24  2:49 ` Wang Zhaolong
  0 siblings, 1 reply; 2+ messages in thread
From: Wang Zhaolong @ 2026-05-27  9:20 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby, Ilpo Järvinen, Xin Zhao,
	Andy Shevchenko, Kees Cook, Ingo Molnar, Bing Fan, Guanbing Huang,
	linux-kernel, linux-serial
  Cc: Wang Zhaolong, stable

Concurrent startup of two 8250 ports sharing the same IRQ can trigger an
IRQ core warning:

  Unbalanced enable for IRQ 3
  WARNING: CPU: 0 PID: 580 at kernel/irq/manage.c:774 __enable_irq+0x3b/0x60
  Call Trace:
   enable_irq+0x8d/0x120
   serial8250_do_startup+0x80d/0xa80
   uart_port_startup+0x13d/0x440
   uart_port_activate+0x5b/0xb0
   tty_port_open+0xa1/0x120
   uart_open+0x1e/0x30
   tty_open+0x140/0x7a0

The second port can then run the shared-IRQ startup test while the IRQ core
is still enabling the line for the first port.  The local
disable_irq_nosync()/enable_irq() pair is balanced, but the interleaving can
still unbalance the IRQ core disable depth.

That makes the QEMU legacy serial ports enter the shared-IRQ THRE test path:

  serial8250_do_startup()
    if (port->irqflags & IRQF_SHARED)
      disable_irq_nosync(port->irq)
    ...
    if (port->irqflags & IRQF_SHARED)
      enable_irq(port->irq)

One possible interleaving is:

  CPU0, ttyS1                         CPU1, ttyS3

  serial_link_irq_chain()
    hash_add(i)
    i->head = &ttyS1
    request_irq()
                                        serial_link_irq_chain()
                                          find i in irq_lists
                                          list_add(&ttyS3, i->head)
                                        serial8250_do_startup()
                                          disable_irq_nosync(irq)
    irq_startup()
      desc->depth = 0
                                          enable_irq(irq)
                                            WARN: Unbalanced enable for IRQ 3

Keep hash_mutex held in serial_link_irq_chain() until the first request_irq()
has completed.  This prevents another 8250 port sharing the IRQ from joining
the chain and running the THRE test while the IRQ core is still starting the
interrupt.

This was reproduced in QEMU with ttyS1 and ttyS3 sharing IRQ 3.  With this
change, 100000 synchronized open/close iterations on /dev/ttyS1 and /dev/ttyS3
completed without the warning.

Fixes: 64c79dfbc458 ("serial: 8250_pnp: Support configurable reg shift property")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221579
Cc: stable@vger.kernel.org # 6.10+
Assisted-by: Codex:gpt-5
Signed-off-by: Wang Zhaolong <wangzhaolong@fnnas.com>
---
 drivers/tty/serial/8250/8250_core.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index a428e88938eb..64eed4dc343f 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -132,12 +132,10 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
  */
 static struct irq_info *serial_get_or_create_irq_info(const struct uart_8250_port *up)
 {
 	struct irq_info *i;
 
-	guard(mutex)(&hash_mutex);
-
 	hash_for_each_possible(irq_lists, i, node, up->port.irq)
 		if (i->irq == up->port.irq)
 			return i;
 
 	i = kzalloc_obj(*i);
@@ -154,10 +152,12 @@ static struct irq_info *serial_get_or_create_irq_info(const struct uart_8250_por
 static int serial_link_irq_chain(struct uart_8250_port *up)
 {
 	struct irq_info *i;
 	int ret;
 
+	guard(mutex)(&hash_mutex);
+
 	i = serial_get_or_create_irq_info(up);
 	if (IS_ERR(i))
 		return PTR_ERR(i);
 
 	scoped_guard(spinlock_irq, &i->lock) {
@@ -169,10 +169,15 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 
 		INIT_LIST_HEAD(&up->list);
 		i->head = &up->list;
 	}
 
+	/*
+	 * Keep the shared-IRQ chain locked until the first handler is installed.
+	 * Otherwise another UART can join early and run startup IRQ masking while
+	 * the IRQ core is still enabling the line, unbalancing the disable depth.
+	 */
 	ret = request_irq(up->port.irq, serial8250_interrupt, up->port.irqflags, up->port.name, i);
 	if (ret < 0)
 		serial_do_unlink(i, up);
 
 	return ret;
-- 
2.54.0

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] serial: 8250: serialize shared IRQ startup
  2026-05-27  9:20 [PATCH] serial: 8250: serialize shared IRQ startup Wang Zhaolong
@ 2026-06-24  2:49 ` Wang Zhaolong
  0 siblings, 0 replies; 2+ messages in thread
From: Wang Zhaolong @ 2026-06-24  2:49 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby, Ilpo Järvinen, Xin Zhao,
	Andy Shevchenko, Kees Cook, Ingo Molnar, Bing Fan, Guanbing Huang,
	linux-kernel, linux-serial
  Cc: stable

On Wed, May 27, 2026 at 05:20:51PM +0800, Wang Zhaolong wrote:
> Concurrent startup of two 8250 ports sharing the same IRQ can trigger an
> IRQ core warning:
> 
>   Unbalanced enable for IRQ 3
>   WARNING: CPU: 0 PID: 580 at kernel/irq/manage.c:774 __enable_irq+0x3b/0x60
>   Call Trace:
>    enable_irq+0x8d/0x120
>    serial8250_do_startup+0x80d/0xa80
>    uart_port_startup+0x13d/0x440
>    uart_port_activate+0x5b/0xb0
>    tty_port_open+0xa1/0x120
>    uart_open+0x1e/0x30
>    tty_open+0x140/0x7a0
> 
> The second port can then run the shared-IRQ startup test while the IRQ core
> is still enabling the line for the first port.  The local
> disable_irq_nosync()/enable_irq() pair is balanced, but the interleaving can
> still unbalance the IRQ core disable depth.
> 
> That makes the QEMU legacy serial ports enter the shared-IRQ THRE test path:
> 
>   serial8250_do_startup()
>     if (port->irqflags & IRQF_SHARED)
>       disable_irq_nosync(port->irq)
>     ...
>     if (port->irqflags & IRQF_SHARED)
>       enable_irq(port->irq)
> 
> One possible interleaving is:
> 
>   CPU0, ttyS1                         CPU1, ttyS3
> 
>   serial_link_irq_chain()
>     hash_add(i)
>     i->head = &ttyS1
>     request_irq()
>                                         serial_link_irq_chain()
>                                           find i in irq_lists
>                                           list_add(&ttyS3, i->head)
>                                         serial8250_do_startup()
>                                           disable_irq_nosync(irq)
>     irq_startup()
>       desc->depth = 0
>                                           enable_irq(irq)
>                                             WARN: Unbalanced enable for IRQ 3
> 
> Keep hash_mutex held in serial_link_irq_chain() until the first request_irq()
> has completed.  This prevents another 8250 port sharing the IRQ from joining
> the chain and running the THRE test while the IRQ core is still starting the
> interrupt.
> 
> This was reproduced in QEMU with ttyS1 and ttyS3 sharing IRQ 3.  With this
> change, 100000 synchronized open/close iterations on /dev/ttyS1 and /dev/ttyS3
> completed without the warning.
> 
> Fixes: 64c79dfbc458 ("serial: 8250_pnp: Support configurable reg shift property")
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221579
> Cc: stable@vger.kernel.org # 6.10+
> Assisted-by: Codex:gpt-5
> Signed-off-by: Wang Zhaolong <wangzhaolong@fnnas.com>
> ---
>  drivers/tty/serial/8250/8250_core.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
> index a428e88938eb..64eed4dc343f 100644
> --- a/drivers/tty/serial/8250/8250_core.c
> +++ b/drivers/tty/serial/8250/8250_core.c
> @@ -132,12 +132,10 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
>   */
>  static struct irq_info *serial_get_or_create_irq_info(const struct uart_8250_port *up)
>  {
>  	struct irq_info *i;
>  
> -	guard(mutex)(&hash_mutex);
> -
>  	hash_for_each_possible(irq_lists, i, node, up->port.irq)
>  		if (i->irq == up->port.irq)
>  			return i;
>  
>  	i = kzalloc_obj(*i);
> @@ -154,10 +152,12 @@ static struct irq_info *serial_get_or_create_irq_info(const struct uart_8250_por
>  static int serial_link_irq_chain(struct uart_8250_port *up)
>  {
>  	struct irq_info *i;
>  	int ret;
>  
> +	guard(mutex)(&hash_mutex);
> +
>  	i = serial_get_or_create_irq_info(up);
>  	if (IS_ERR(i))
>  		return PTR_ERR(i);
>  
>  	scoped_guard(spinlock_irq, &i->lock) {
> @@ -169,10 +169,15 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
>  
>  		INIT_LIST_HEAD(&up->list);
>  		i->head = &up->list;
>  	}
>  
> +	/*
> +	 * Keep the shared-IRQ chain locked until the first handler is installed.
> +	 * Otherwise another UART can join early and run startup IRQ masking while
> +	 * the IRQ core is still enabling the line, unbalancing the disable depth.
> +	 */
>  	ret = request_irq(up->port.irq, serial8250_interrupt, up->port.irqflags, up->port.name, i);
>  	if (ret < 0)
>  		serial_do_unlink(i, up);
>  
>  	return ret;
> -- 
> 2.54.0

Hi Maintainers,

Friendly ping on this patch.

This is a clean and simple one-line relocation fix for the shared IRQ race condition.

I noticed there is another ongoing thread attempting to address the same bug with a
much more complex approach, but it seems to miss the regression test cases.

Could you please take a look at this simpler alternative when you have time? Any
feedback or reviews would be highly appreciated.

Thanks,
Wang

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-24  2:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-27  9:20 [PATCH] serial: 8250: serialize shared IRQ startup Wang Zhaolong
2026-06-24  2:49 ` Wang Zhaolong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.