* Re: [PATCH] x86/apic: Add retry mechanism to add_pin_to_irq_node()
2024-07-29 16:55 ` Breno Leitao
@ 2024-07-29 17:44 ` Thomas Gleixner
2024-07-30 10:28 ` Breno Leitao
2024-08-07 16:25 ` [tip: x86/apic] x86/ioapic: Handle allocation failures gracefully tip-bot2 for Thomas Gleixner
1 sibling, 1 reply; 6+ messages in thread
From: Thomas Gleixner @ 2024-07-29 17:44 UTC (permalink / raw)
To: Breno Leitao
Cc: Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
leit, Peter Zijlstra (Intel), Wei Liu, Marc Zyngier, Adrian Huang,
open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)
Breno!
On Mon, Jul 29 2024 at 09:55, Breno Leitao wrote:
>> > panic
>> > _printk
>> > panic_smp_self_stop
>> > rcu_is_watching
>> > intel_irq_remapping_free
>>
>> This completely lacks context. When does this happen? What's the system
>> state? What has intel_irq_remapping_free() to do with the allocation path?
>
> Sorry, let me clarify it a bit better:
>
> 1) This happens when the machine is booted up, and being under stress
> 2) This happens when I have failslab fault injection enabled.
> 3) The machine crashes after hitting this error.
> 4) This is reproducible with `stress-ng` using the `--aggressive` parameter
> 5) This is the full stack (sorry for not decoding the stack, but if you
> need it, I am more than happy to give you a decoded stack)
Ok. That makes sense.
>> While it seems to make sense, the reality is that this is mostly early
>> boot code. If there is a real world memory allocation failure during
>> early boot then retries will not help at all.
>
> This is not happening at early boot, this is reproducible when running
> stress-ng in this aggressive mode.
>
> Since I have failslab injecting a kmalloc fault,
> __add_pin_to_irq_noder() returns -ENOMEM, which causes the undesired
> panic().
Fine. During runtime that allocation fail should not be fatal. It just
needs to be properly propagated.
>> > Introduce a retry mechanism that attempts to add the pin up to 3 times
>> > before giving up and panicking. This should improve the robustness of
>> > the IO-APIC code in the face of transient errors.
>>
>> I'm absolutely not convinced by this loop heuristic. That's just a bad
>> hack.
>
> I will not disagree with you here, but I need to use this patch in order
> to be able t keep the system not panicking and stable while fault
> injecting slab errors and trying to reproduce a real bug in the network
> stack.
Something like the untested below should just work.
Thanks,
tglx
---
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -352,27 +352,26 @@ static void ioapic_mask_entry(int apic,
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static int __add_pin_to_irq_node(struct mp_chip_data *data,
- int node, int apic, int pin)
+static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
{
struct irq_pin_list *entry;
/* don't allow duplicates */
- for_each_irq_pin(entry, data->irq_2_pin)
+ for_each_irq_pin(entry, data->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
- return 0;
+ return true;
+ }
entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
if (!entry) {
- pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
- node, apic, pin);
- return -ENOMEM;
+ pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", node, apic, pin);
+ return false;
}
+
entry->apic = apic;
entry->pin = pin;
list_add_tail(&entry->list, &data->irq_2_pin);
-
- return 0;
+ return true;
}
static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
@@ -387,35 +386,6 @@ static void __remove_pin_from_irq(struct
}
}
-static void add_pin_to_irq_node(struct mp_chip_data *data,
- int node, int apic, int pin)
-{
- if (__add_pin_to_irq_node(data, node, apic, pin))
- panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
- int oldapic, int oldpin,
- int newapic, int newpin)
-{
- struct irq_pin_list *entry;
-
- for_each_irq_pin(entry, data->irq_2_pin) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- /* every one is different, right? */
- return;
- }
- }
-
- /* old apic/pin didn't exist, so just add new ones */
- add_pin_to_irq_node(data, node, newapic, newpin);
-}
-
static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
void (*final)(struct irq_pin_list *entry))
{
@@ -1002,8 +972,7 @@ static int alloc_isa_irq_from_domain(str
if (irq_data && irq_data->parent_data) {
if (!mp_check_pin_attr(irq, info))
return -EBUSY;
- if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
- info->ioapic.pin))
+ if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))
return -ENOMEM;
} else {
info->flags |= X86_IRQ_ALLOC_LEGACY;
@@ -2131,10 +2100,10 @@ static int __init disable_timer_pin_setu
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
-static int mp_alloc_timer_irq(int ioapic, int pin)
+static int __init mp_alloc_timer_irq(int ioapic, int pin)
{
- int irq = -1;
struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+ int irq = -1;
if (domain) {
struct irq_alloc_info info;
@@ -2150,6 +2119,24 @@ static int mp_alloc_timer_irq(int ioapic
return irq;
}
+static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, data->irq_2_pin) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ return;
+ }
+ }
+
+ /* Old apic/pin didn't exist, so just add a new one */
+ add_pin_to_irq_node(data, node, newapic, newpin);
+}
+
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -2996,9 +2983,9 @@ int mp_irqdomain_alloc(struct irq_domain
unsigned int nr_irqs, void *arg)
{
struct irq_alloc_info *info = arg;
+ int ret = -ENOMEM, ioapic, pin;
struct mp_chip_data *data;
struct irq_data *irq_data;
- int ret, ioapic, pin;
unsigned long flags;
if (!info || nr_irqs > 1)
@@ -3016,22 +3003,21 @@ int mp_irqdomain_alloc(struct irq_domain
if (!data)
return -ENOMEM;
- ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
- if (ret < 0) {
- kfree(data);
- return ret;
- }
-
INIT_LIST_HEAD(&data->irq_2_pin);
irq_data->hwirq = info->ioapic.pin;
irq_data->chip = (domain->parent == x86_vector_domain) ?
&ioapic_chip : &ioapic_ir_chip;
irq_data->chip_data = data;
mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
+ mp_preconfigure_entry(data);
- add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+ if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin))
+ goto out_data;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+ if (ret < 0)
+ goto out_pin;
- mp_preconfigure_entry(data);
mp_register_handler(virq, data->is_level);
local_irq_save(flags);
@@ -3044,6 +3030,12 @@ int mp_irqdomain_alloc(struct irq_domain
ioapic, mpc_ioapic_id(ioapic), pin, virq,
data->is_level, data->active_low);
return 0;
+
+out_pin:
+ __remove_pin_from_irq(data, ioapic, (int)irq_data->hwirq);
+out_data:
+ kfree(data);
+ return ret;
}
void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
^ permalink raw reply [flat|nested] 6+ messages in thread* [tip: x86/apic] x86/ioapic: Handle allocation failures gracefully
2024-07-29 16:55 ` Breno Leitao
2024-07-29 17:44 ` Thomas Gleixner
@ 2024-08-07 16:25 ` tip-bot2 for Thomas Gleixner
1 sibling, 0 replies; 6+ messages in thread
From: tip-bot2 for Thomas Gleixner @ 2024-08-07 16:25 UTC (permalink / raw)
To: linux-tip-commits
Cc: Breno Leitao, Thomas Gleixner, Qiuxu Zhuo, x86, linux-kernel
The following commit has been merged into the x86/apic branch of tip:
Commit-ID: 830802a0fea8fb39d3dc9fb7d6b5581e1343eb1f
Gitweb: https://git.kernel.org/tip/830802a0fea8fb39d3dc9fb7d6b5581e1343eb1f
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 02 Aug 2024 18:15:34 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 07 Aug 2024 18:13:27 +02:00
x86/ioapic: Handle allocation failures gracefully
Breno observed panics when using failslab under certain conditions during
runtime:
can not alloc irq_pin_list (-1,0,20)
Kernel panic - not syncing: IO-APIC: failed to add irq-pin. Can not proceed
panic+0x4e9/0x590
mp_irqdomain_alloc+0x9ab/0xa80
irq_domain_alloc_irqs_locked+0x25d/0x8d0
__irq_domain_alloc_irqs+0x80/0x110
mp_map_pin_to_irq+0x645/0x890
acpi_register_gsi_ioapic+0xe6/0x150
hpet_open+0x313/0x480
That's a pointless panic which is a leftover of the historic IO/APIC code
which panic'ed during early boot when the interrupt allocation failed.
The only place which might justify panic is the PIT/HPET timer_check() code
which tries to figure out whether the timer interrupt is delivered through
the IO/APIC. But that code does not require to handle interrupt allocation
failures. If the interrupt cannot be allocated then timer delivery fails
and it either panics due to that or falls back to legacy mode.
Cure this by removing the panic wrapper around __add_pin_to_irq_node() and
making mp_irqdomain_alloc() aware of the failure condition and handle it as
any other failure in this function gracefully.
Reported-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Breno Leitao <leitao@debian.org>
Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Link: https://lore.kernel.org/all/ZqfJmUF8sXIyuSHN@gmail.com
Link: https://lore.kernel.org/all/20240802155440.275200843@linutronix.de
---
arch/x86/kernel/apic/io_apic.c | 46 +++++++++++++++------------------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 477b740..d1ec1dc 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -352,27 +352,26 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static int __add_pin_to_irq_node(struct mp_chip_data *data,
- int node, int apic, int pin)
+static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
{
struct irq_pin_list *entry;
- /* don't allow duplicates */
- for_each_irq_pin(entry, data->irq_2_pin)
+ /* Don't allow duplicates */
+ for_each_irq_pin(entry, data->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
- return 0;
+ return true;
+ }
entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
if (!entry) {
- pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
- node, apic, pin);
- return -ENOMEM;
+ pr_err("Cannot allocate irq_pin_list (%d,%d,%d)\n", node, apic, pin);
+ return false;
}
+
entry->apic = apic;
entry->pin = pin;
list_add_tail(&entry->list, &data->irq_2_pin);
-
- return 0;
+ return true;
}
static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
@@ -387,13 +386,6 @@ static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
}
}
-static void add_pin_to_irq_node(struct mp_chip_data *data,
- int node, int apic, int pin)
-{
- if (__add_pin_to_irq_node(data, node, apic, pin))
- panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
-}
-
/*
* Reroute an IRQ to a different pin.
*/
@@ -1002,8 +994,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
if (irq_data && irq_data->parent_data) {
if (!mp_check_pin_attr(irq, info))
return -EBUSY;
- if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
- info->ioapic.pin))
+ if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))
return -ENOMEM;
} else {
info->flags |= X86_IRQ_ALLOC_LEGACY;
@@ -3017,10 +3008,8 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
return -ENOMEM;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
- if (ret < 0) {
- kfree(data);
- return ret;
- }
+ if (ret < 0)
+ goto free_data;
INIT_LIST_HEAD(&data->irq_2_pin);
irq_data->hwirq = info->ioapic.pin;
@@ -3029,7 +3018,10 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
irq_data->chip_data = data;
mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
- add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+ if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin)) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
mp_preconfigure_entry(data);
mp_register_handler(virq, data->is_level);
@@ -3044,6 +3036,12 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
ioapic, mpc_ioapic_id(ioapic), pin, virq,
data->is_level, data->active_low);
return 0;
+
+free_irqs:
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+free_data:
+ kfree(data);
+ return ret;
}
void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
^ permalink raw reply related [flat|nested] 6+ messages in thread