All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Breno Leitao <leitao@debian.org>
Cc: Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	leit@meta.com, "Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Wei Liu <wei.liu@kernel.org>, Marc Zyngier <maz@kernel.org>,
	Adrian Huang <ahuang12@lenovo.com>,
	"open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)"
	<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] x86/apic: Add retry mechanism to add_pin_to_irq_node()
Date: Mon, 29 Jul 2024 19:44:09 +0200	[thread overview]
Message-ID: <871q3c855i.ffs@tglx> (raw)
In-Reply-To: <ZqfJmUF8sXIyuSHN@gmail.com>

Breno!

On Mon, Jul 29 2024 at 09:55, Breno Leitao wrote:
>> > 	 panic
>> > 	   _printk
>> > 	   panic_smp_self_stop
>> > 	   rcu_is_watching
>> > 	   intel_irq_remapping_free
>> 
>> This completely lacks context. When does this happen? What's the system
>> state? What has intel_irq_remapping_free() to do with the allocation path?
>
> Sorry, let me clarify it a bit better:
>
> 1) This happens when the machine is booted up, and being under stress
> 2) This happens when I have failslab fault injection enabled.
> 3) The machine crashes after hitting this error.
> 4) This is reproducible with `stress-ng` using the `--aggressive` parameter
> 5) This is the full stack (sorry for not decoding the stack, but if you
>    need it, I am more than happy to give you a decoded stack)

Ok. That makes sense.

>> While it seems to make sense, the reality is that this is mostly early
>> boot code. If there is a real world memory allocation failure during
>> early boot then retries will not help at all.
>
> This is not happening at early boot, this is reproducible when running
> stress-ng in this aggressive mode.
>
> Since I have failslab injecting a kmalloc fault,
> __add_pin_to_irq_noder() returns -ENOMEM, which causes the undesired
> panic().

Fine. During runtime that allocation fail should not be fatal. It just
needs to be properly propagated.

>> > Introduce a retry mechanism that attempts to add the pin up to 3 times
>> > before giving up and panicking. This should improve the robustness of
>> > the IO-APIC code in the face of transient errors.
>> 
>> I'm absolutely not convinced by this loop heuristic. That's just a bad
>> hack.
>
> I will not disagree with you here, but I need to use this patch in order
> to be able t keep the system not panicking and stable while fault
> injecting slab errors and trying to reproduce a real bug in the network
> stack.

Something like the untested below should just work.

Thanks,

        tglx
---
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -352,27 +352,26 @@ static void ioapic_mask_entry(int apic,
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static int __add_pin_to_irq_node(struct mp_chip_data *data,
-				 int node, int apic, int pin)
+static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	/* don't allow duplicates */
-	for_each_irq_pin(entry, data->irq_2_pin)
+	for_each_irq_pin(entry, data->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
-			return 0;
+			return true;
+	}
 
 	entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
 	if (!entry) {
-		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
-		       node, apic, pin);
-		return -ENOMEM;
+		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", node, apic, pin);
+		return false;
 	}
+
 	entry->apic = apic;
 	entry->pin = pin;
 	list_add_tail(&entry->list, &data->irq_2_pin);
-
-	return 0;
+	return true;
 }
 
 static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
@@ -387,35 +386,6 @@ static void __remove_pin_from_irq(struct
 		}
 }
 
-static void add_pin_to_irq_node(struct mp_chip_data *data,
-				int node, int apic, int pin)
-{
-	if (__add_pin_to_irq_node(data, node, apic, pin))
-		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
-					   int oldapic, int oldpin,
-					   int newapic, int newpin)
-{
-	struct irq_pin_list *entry;
-
-	for_each_irq_pin(entry, data->irq_2_pin) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-			/* every one is different, right? */
-			return;
-		}
-	}
-
-	/* old apic/pin didn't exist, so just add new ones */
-	add_pin_to_irq_node(data, node, newapic, newpin);
-}
-
 static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
 			       void (*final)(struct irq_pin_list *entry))
 {
@@ -1002,8 +972,7 @@ static int alloc_isa_irq_from_domain(str
 	if (irq_data && irq_data->parent_data) {
 		if (!mp_check_pin_attr(irq, info))
 			return -EBUSY;
-		if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
-					  info->ioapic.pin))
+		if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))
 			return -ENOMEM;
 	} else {
 		info->flags |= X86_IRQ_ALLOC_LEGACY;
@@ -2131,10 +2100,10 @@ static int __init disable_timer_pin_setu
 }
 early_param("disable_timer_pin_1", disable_timer_pin_setup);
 
-static int mp_alloc_timer_irq(int ioapic, int pin)
+static int __init mp_alloc_timer_irq(int ioapic, int pin)
 {
-	int irq = -1;
 	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+	int irq = -1;
 
 	if (domain) {
 		struct irq_alloc_info info;
@@ -2150,6 +2119,24 @@ static int mp_alloc_timer_irq(int ioapic
 	return irq;
 }
 
+static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, data->irq_2_pin) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+			return;
+		}
+	}
+
+	/* Old apic/pin didn't exist, so just add a new one */
+	add_pin_to_irq_node(data, node, newapic, newpin);
+}
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2996,9 +2983,9 @@ int mp_irqdomain_alloc(struct irq_domain
 		       unsigned int nr_irqs, void *arg)
 {
 	struct irq_alloc_info *info = arg;
+	int ret = -ENOMEM, ioapic, pin;
 	struct mp_chip_data *data;
 	struct irq_data *irq_data;
-	int ret, ioapic, pin;
 	unsigned long flags;
 
 	if (!info || nr_irqs > 1)
@@ -3016,22 +3003,21 @@ int mp_irqdomain_alloc(struct irq_domain
 	if (!data)
 		return -ENOMEM;
 
-	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
-	if (ret < 0) {
-		kfree(data);
-		return ret;
-	}
-
 	INIT_LIST_HEAD(&data->irq_2_pin);
 	irq_data->hwirq = info->ioapic.pin;
 	irq_data->chip = (domain->parent == x86_vector_domain) ?
 			  &ioapic_chip : &ioapic_ir_chip;
 	irq_data->chip_data = data;
 	mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
+	mp_preconfigure_entry(data);
 
-	add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+	if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin))
+		goto out_data;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+	if (ret < 0)
+		goto out_pin;
 
-	mp_preconfigure_entry(data);
 	mp_register_handler(virq, data->is_level);
 
 	local_irq_save(flags);
@@ -3044,6 +3030,12 @@ int mp_irqdomain_alloc(struct irq_domain
 		    ioapic, mpc_ioapic_id(ioapic), pin, virq,
 		    data->is_level, data->active_low);
 	return 0;
+
+out_pin:
+	__remove_pin_from_irq(data, ioapic, (int)irq_data->hwirq);
+out_data:
+	kfree(data);
+	return ret;
 }
 
 void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,

  reply	other threads:[~2024-07-29 17:44 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-29 14:06 [PATCH] x86/apic: Add retry mechanism to add_pin_to_irq_node() Breno Leitao
2024-07-29 16:13 ` Thomas Gleixner
2024-07-29 16:55   ` Breno Leitao
2024-07-29 17:44     ` Thomas Gleixner [this message]
2024-07-30 10:28       ` Breno Leitao
2024-08-07 16:25     ` [tip: x86/apic] x86/ioapic: Handle allocation failures gracefully tip-bot2 for Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=871q3c855i.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=ahuang12@lenovo.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=leit@meta.com \
    --cc=leitao@debian.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=wei.liu@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.