kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: Marc Zyngier <maz@kernel.org>,
	Oliver Upton <oliver.upton@linux.dev>,
	 Joey Gouly <joey.gouly@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Zenghui Yu <yuzenghui@huawei.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sebastian Ott <sebott@redhat.com>,
	Andre Przywara <andre.przywara@arm.com>,
	Thorsten Blum <thorsten.blum@linux.dev>,
	Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>,
	linux-arm-kernel@lists.infradead.org,  kvmarm@lists.linux.dev,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [RFC PATCH 2/2] KVM: arm64: vgic-its: Unmap all vPEs on shutdown
Date: Mon, 23 Jun 2025 18:38:46 +0200	[thread overview]
Message-ID: <c142f447c59861f3c94b0fea7f055f4ff201fa98.camel@infradead.org> (raw)
In-Reply-To: <20250623132714.965474-2-dwmw2@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 4693 bytes --]

On Mon, 2025-06-23 at 14:27 +0100, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> We observed systems going dark on kexec, due to corruption of the new
> kernel's text (and sometimes the initrd). This was eventually determined
> to be caused by the vLPI pending tables used by the GIC in the previous
> kernel, which were not being quiesced properly.

FWIW this is a previous hack we attempted which *didn't* work. (For
illustration only; ignore the syscore .kexec hook. We addressed that
differently in the end with
https://lore.kernel.org/kexec/20231213064004.2419447-1-jgowans@amazon.com/ )

At the point where the its_kexec() hook in this patch has completed, we
poisoned the (ex-) vLPI pending tables and then scanned for corruption
in them. We saw the same characteristic pattern of corruption which had
been breaking the next kernel after kexec: 32 bytes copied from offset
0 to offset 32 in a page, followed by bytes 0, 1, 32, 33, 34, 35 being
zeroed.

Adding a few milliseconds of sleep before the poisoning was enough to
make the problem go away. As is the patch which calls unmap_all_vpes()
∀ kvm.

Of course, if the GIC were behind an IOMMU as all DMA-capable devices
should be, this might never have happened...

diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index f407cce9ecaa..a4fde376d214 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,12 @@ struct gic_quirk {
 	u32 mask;
 };
 
+struct redist_region {
+	void __iomem		*redist_base;
+	phys_addr_t		phys_base;
+	bool			single_redist;
+};
+
 int gic_configure_irq(unsigned int irq, unsigned int type,
                        void __iomem *base, void (*sync_access)(void));
 void gic_dist_config(void __iomem *base, int gic_irqs,
@@ -33,4 +39,6 @@ void gic_enable_of_quirks(const struct device_node *np,
 #define RDIST_FLAGS_RD_TABLES_PREALLOCATED     (1 << 1)
 #define RDIST_FLAGS_FORCE_NON_SHAREABLE        (1 << 2)
 
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *));
+
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 638f7eb033ad..d106b6ccca8b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4902,6 +4902,51 @@ static void its_enable_quirks(struct its_node *its)
 				     its_quirks, its);
 }
 
+static int disable_vpes(struct redist_region *region, void __iomem *ptr)
+{
+	u64 typer;
+	u64 val;
+
+	typer = gic_read_typer(ptr + GICR_TYPER);
+
+	if (!((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)))
+		return 1;
+
+	/* Deactivate any present vPE */
+	its_clear_vpend_valid(ptr + SZ_128K, 0, GICR_VPENDBASER_PendingLast);
+
+	/* Mark the VPE table as invalid */
+	val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER);
+	val &= ~GICR_VPROPBASER_4_1_VALID;
+	gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER);
+
+	/* Disable next redistributor */
+	return 1;
+}
+
+static int its_kexec(void)
+{
+	int err = 0, err_return = 0;
+	struct its_node *its;
+
+	raw_spin_lock(&its_lock);
+
+	list_for_each_entry(its, &its_nodes, entry) {
+		err = its_force_quiescent(its->base);
+		if (err) {
+			pr_err("ITS@%pa: failed to quiesce: %d\n",
+			       &its->phys_base, err);
+			err_return = -EBUSY;
+		}
+	}
+
+	gic_iterate_rdists(disable_vpes);
+
+	raw_spin_unlock(&its_lock);
+
+	return err_return;
+}
+
 static int its_save_disable(void)
 {
 	struct its_node *its;
@@ -5001,6 +5046,7 @@ static void its_restore_enable(void)
 static struct syscore_ops its_syscore_ops = {
 	.suspend = its_save_disable,
 	.resume = its_restore_enable,
+	.kexec = its_kexec,
 };
 
 static void __init __iomem *its_map_one(struct resource *res, int *err)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 50143de1791d..2014c5a75a6e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -46,12 +46,6 @@
 
 #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
 
-struct redist_region {
-	void __iomem		*redist_base;
-	phys_addr_t		phys_base;
-	bool			single_redist;
-};
-
 struct gic_chip_data {
 	struct fwnode_handle	*fwnode;
 	phys_addr_t		dist_phys_base;
@@ -968,7 +962,7 @@ static void __init gic_dist_init(void)
 		gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8);
 }
 
-static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
+int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
 {
 	int ret = -ENODEV;
 	int i;


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5069 bytes --]

  reply	other threads:[~2025-06-23 16:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-23 13:27 [RFC PATCH 1/2] KVM: Add arch hooks for KVM syscore ops David Woodhouse
2025-06-23 13:27 ` [RFC PATCH 2/2] KVM: arm64: vgic-its: Unmap all vPEs on shutdown David Woodhouse
2025-06-23 16:38   ` David Woodhouse [this message]
2025-07-22 10:35     ` David Woodhouse
2025-07-22 22:46   ` Oliver Upton
2025-07-23  9:42     ` David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c142f447c59861f3c94b0fea7f055f4ff201fa98.camel@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=andre.przywara@arm.com \
    --cc=catalin.marinas@arm.com \
    --cc=joey.gouly@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=pbonzini@redhat.com \
    --cc=sebott@redhat.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=suzuki.poulose@arm.com \
    --cc=thorsten.blum@linux.dev \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).