Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 09/18] KVM: arm64: vgic-v5: Limit support to 64 PPIs
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

Although we have some code supporting 128 PPIs, the only supported
configuration is 64 PPIs. There is no way to test the 128 PPI code,
so it is bound to bitrot very quickly.

Given that KVM/arm64's goal has always been to stick to non-IMPDEF
behaviours, drop the 128 PPI support. Someone motivated enough and
with very strong arguments can always bring it back -- it's all in
the git history.

Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/vgic-v5-sr.c       | 82 ++++++---------------------
 arch/arm64/kvm/sys_regs.c             | 17 +++---
 arch/arm64/kvm/vgic/vgic-kvm-device.c |  9 +--
 3 files changed, 26 insertions(+), 82 deletions(-)

diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c
index 47e6bcd437029..6d69dfe89a96c 100644
--- a/arch/arm64/kvm/hyp/vgic-v5-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c
@@ -30,10 +30,9 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
 {
 	/*
 	 * The following code assumes that the bitmap storage that we have for
-	 * PPIs is either 64 (architected PPIs, only) or 128 bits (architected &
-	 * impdef PPIs).
+	 * PPIs is either 64 (architected PPIs, only).
 	 */
-	BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
+	BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS != 64);
 
 	bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
 		     read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64);
@@ -49,22 +48,6 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
 	cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
 	cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
 
-	if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
-		bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
-			     read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64);
-		bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr,
-			     read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64);
-
-		cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
-		cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
-		cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
-		cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
-		cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
-		cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
-		cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
-		cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
-	}
-
 	/* Now that we are done, disable DVI */
 	write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
 	write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
@@ -74,9 +57,6 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
 {
 	DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
 
-	/* We assume 64 or 128 PPIs - see above comment */
-	BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
-
 	/* Enable DVI so that the guest's interrupt config takes over */
 	write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64),
 		       SYS_ICH_PPI_DVIR0_EL2);
@@ -108,50 +88,20 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
 	write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
 		       SYS_ICH_PPI_PRIORITYR7_EL2);
 
-	if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
-		/* Enable DVI so that the guest's interrupt config takes over */
-		write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64),
-			       SYS_ICH_PPI_DVIR1_EL2);
-
-		write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64),
-			       SYS_ICH_PPI_ACTIVER1_EL2);
-		write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64),
-			       SYS_ICH_PPI_ENABLER1_EL2);
-		write_sysreg_s(bitmap_read(pendr, 64, 64),
-			       SYS_ICH_PPI_PENDR1_EL2);
-
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
-			       SYS_ICH_PPI_PRIORITYR8_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
-			       SYS_ICH_PPI_PRIORITYR9_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
-			       SYS_ICH_PPI_PRIORITYR10_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
-			       SYS_ICH_PPI_PRIORITYR11_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
-			       SYS_ICH_PPI_PRIORITYR12_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
-			       SYS_ICH_PPI_PRIORITYR13_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
-			       SYS_ICH_PPI_PRIORITYR14_EL2);
-		write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
-			       SYS_ICH_PPI_PRIORITYR15_EL2);
-	} else {
-		write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
-
-		write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
-
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
-		write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
-	}
+	write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
+
+	write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
+
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
+	write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
 }
 
 void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 513f5f1429b5f..6083a1b23dbf9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -724,6 +724,7 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
 {
 	unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask;
 	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	unsigned long reg = p->regval;
 	int i;
 
 	/* We never expect to get here with a read! */
@@ -731,21 +732,17 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
 		return undef_access(vcpu, p, r);
 
 	/*
-	 * If we're only handling architected PPIs and the guest writes to the
-	 * enable for the non-architected PPIs, we just return as there's
-	 * nothing to do at all. We don't even allocate the storage for them in
-	 * this case.
+	 * As we're only handling architected PPIs, the guest writes to the
+	 * enable for the non-architected PPIs just return as there's
+	 * nothing to do at all. We don't even allocate the storage for them.
 	 */
-	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2)
+	if (p->Op2 % 2)
 		return true;
 
 	/*
-	 * Merge the raw guest write into out bitmap at an offset of either 0 or
-	 * 64, then and it with our PPI mask.
+	 * Merge the raw guest write into out bitmap, anded with our PPI mask.
 	 */
-	bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64);
-	bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask,
-		   VGIC_V5_NR_PRIVATE_IRQS);
+	bitmap_and(cpu_if->vgic_ppi_enabler, &reg, mask, VGIC_V5_NR_PRIVATE_IRQS);
 
 	/*
 	 * Sync the change in enable states to the vgic_irqs. We consider all
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index a96c77dccf353..90be99443df3b 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -730,18 +730,15 @@ static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
 	guard(mutex)(&dev->kvm->arch.config_lock);
 
 	/*
-	 * We either support 64 or 128 PPIs. In the former case, we need to
-	 * return 0s for the second 64 bits as we have no storage backing those.
+	 * We only support 64 PPIs, so, we need to return 0s for the
+	 * second 64 bits as we have no storage backing those.
 	 */
 	ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr);
 	if (ret)
 		return ret;
 	uaddr++;
 
-	if (VGIC_V5_NR_PRIVATE_IRQS == 128)
-		ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr);
-	else
-		ret = put_user(0, uaddr);
+	ret = put_user(0, uaddr);
 
 	return ret;
 }
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 08/18] KVM: arm64: vgic: Rationalise per-CPU irq accessor
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

Despite adding the necessary infrastructure to identify irq types,
vgic_get_vcpu_irq() treats GICv5 PPIs in a special way, which
impairs the readability of the code.

Use the existing irq classifiers to handle per-CPU irqs for all
vgic types, and let the normal control flow reach global interrupt
handling without any v5-specific path.

Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 3ac6d49bc4876..b697678d68b01 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,24 +106,23 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
 
 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
 {
+	enum kvm_device_type type;
+
 	if (WARN_ON(!vcpu))
 		return NULL;
 
-	if (vgic_is_v5(vcpu->kvm)) {
-		u32 int_num, hwirq_id;
-
-		if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
-			return NULL;
-
-		hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
-		int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
+	type = vcpu->kvm->arch.vgic.vgic_model;
 
-		return &vcpu->arch.vgic_cpu.private_irqs[int_num];
-	}
+	if (__irq_is_sgi(type, intid) || __irq_is_ppi(type, intid)) {
+		switch (type) {
+		case KVM_DEV_TYPE_ARM_VGIC_V5:
+			intid = vgic_v5_get_hwirq_id(intid);
+			intid = array_index_nospec(intid, VGIC_V5_NR_PRIVATE_IRQS);
+			break;
+		default:
+			intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
+		}
 
-	/* SGIs and PPIs */
-	if (intid < VGIC_NR_PRIVATE_IRQS) {
-		intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
 		return &vcpu->arch.vgic_cpu.private_irqs[intid];
 	}
 
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 03/18] KVM: arm64: vgic-v5: Remove use of __assign_bit() with a constant
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

Using __assign_bit() is very useful when the value of the bit is
not known at compile time. In all other cases, __set_bit() and
__clear_bit() are the correct tool for the job.

This also fixes an odd case of using VGIC_V5_NR_PRIVATE_IRQS as
the bit value...

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v5.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 7c146fccc9689..4d62b1c31fe8b 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -25,13 +25,13 @@ static void vgic_v5_get_implemented_ppis(void)
 	 * If we have KVM, we have EL2, which means that we have support for the
 	 * EL1 and EL2 Physical & Virtual timers.
 	 */
-	__assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
-	__assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
-	__assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
-	__assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
+	__set_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask);
+	__set_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask);
+	__set_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask);
+	__set_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask);
 
 	/* The SW_PPI should be available */
-	__assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
+	__set_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask);
 
 	/* The PMUIRQ is available if we have the PMU */
 	__assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
@@ -146,9 +146,7 @@ int vgic_v5_init(struct kvm *kvm)
 	/* We only allow userspace to drive the SW_PPI, if it is implemented. */
 	bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis,
 		    VGIC_V5_NR_PRIVATE_IRQS);
-	__assign_bit(GICV5_ARCH_PPI_SW_PPI,
-		     kvm->arch.vgic.gicv5_vm.userspace_ppis,
-		     VGIC_V5_NR_PRIVATE_IRQS);
+	__set_bit(GICV5_ARCH_PPI_SW_PPI, kvm->arch.vgic.gicv5_vm.userspace_ppis);
 	bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis,
 		   kvm->arch.vgic.gicv5_vm.userspace_ppis,
 		   ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
@@ -197,7 +195,7 @@ int vgic_v5_finalize_ppi_state(struct kvm *kvm)
 		/* Expose PPIs with an owner or the SW_PPI, only */
 		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
 			if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) {
-				__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1);
+				__set_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask);
 				__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr,
 					     irq->config == VGIC_CONFIG_LEVEL);
 			}
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 04/18] KVM: arm64: vgic-v5: Drop pointless ARM64_HAS_GICV5_CPUIF check
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

vgic_v5_get_implemented_ppis() can only be called when we have
a GICv5, by construction.

Remove the pointless check against ARM64_HAS_GICV5_CPUIF.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v5.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 4d62b1c31fe8b..0101ec3f55283 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -18,9 +18,6 @@
  */
 static void vgic_v5_get_implemented_ppis(void)
 {
-	if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
-		return;
-
 	/*
 	 * If we have KVM, we have EL2, which means that we have support for the
 	 * EL1 and EL2 Physical & Virtual timers.
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 02/18] KVM: arm64: vgic-v5: Move PPI caps into kvm_vgic_global_state
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

Constant vgic properties are usually kept in kvm_vgic_global_state,
but the vgic-v5 code does its own thing.

Move the ppi_caps data into the global structure, which has the
modest additional advantage of making it ro_after_init.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v5.c |  2 +-
 include/kvm/arm_vgic.h        | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index c0d36658ffe74..7c146fccc9689 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -10,7 +10,7 @@
 
 #include "vgic.h"
 
-static struct vgic_v5_ppi_caps ppi_caps;
+#define ppi_caps	kvm_vgic_global_state.vgic_v5_ppi_caps
 
 /*
  * Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 1388dc6028a9a..ea793479ab254 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -177,6 +177,11 @@ struct vgic_global {
 	bool			has_gcie_v3_compat;
 
 	u32			ich_vtr_el2;
+
+	/* GICv5 PPI capabilities */
+	struct {
+		DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+	} vgic_v5_ppi_caps;
 };
 
 extern struct vgic_global kvm_vgic_global_state;
@@ -492,11 +497,6 @@ struct vgic_v5_cpu_if {
 	struct gicv5_vpe gicv5_vpe;
 };
 
-/* What PPI capabilities does a GICv5 host have */
-struct vgic_v5_ppi_caps {
-	DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
-};
-
 struct vgic_cpu {
 	/* CPU vif control registers for world switch */
 	union {
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 15/18] Documentation: KVM: Fix typos in VGICv5 documentation
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

From: Sascha Bischoff <sascha.bischoff@arm.com>

Fix two typos in the VGICv5 documentation.

Fixes: d51c978b7d3e ("KVM: arm64: gic-v5: Communicate userspace-driveable PPIs via a UAPI")
Fixes: eb3c4d2c9a4d ("Documentation: KVM: Introduce documentation for VGICv5")
Link: https://sashiko.dev/#/patchset/20260319154937.3619520-1-sascha.bischoff%40arm.com
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/virt/kvm/devices/arm-vgic-v5.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index 29335ea823fc5..70b9162755c7e 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -12,8 +12,8 @@ Only one VGIC instance may be instantiated through this API.  The created VGIC
 will act as the VM interrupt controller, requiring emulated user-space devices
 to inject interrupts to the VGIC instead of directly to CPUs.
 
-Creating a guest GICv5 device requires a host GICv5 host.  The current VGICv5
-device only supports PPI interrupts.  These can either be injected from emulated
+Creating a guest GICv5 device requires a GICv5 host.  The current VGICv5 device
+only supports PPI interrupts.  These can either be injected from emulated
 in-kernel devices (such as the Arch Timer, or PMU), or via the KVM_IRQ_LINE
 ioctl.
 
@@ -25,7 +25,7 @@ Groups:
       request the initialization of the VGIC, no additional parameter in
       kvm_device_attr.addr. Must be called after all VCPUs have been created.
 
-   KVM_DEV_ARM_VGIC_USERPSPACE_PPIs
+   KVM_DEV_ARM_VGIC_USERSPACE_PPIS
       request the mask of userspace-drivable PPIs. Only a subset of the PPIs can
       be directly driven from userspace with GICv5, and the returned mask
       informs userspace of which it is allowed to drive via KVM_IRQ_LINE.
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 11/18] KVM: arm64: vgic-v5: Atomically assign bits to PPI DVI bitmap
From: Marc Zyngier @ 2026-05-20  9:19 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Sascha Bischoff
In-Reply-To: <20260520091949.542365-1-maz@kernel.org>

From: Sascha Bischoff <sascha.bischoff@arm.com>

For GICv5 guests we make use of the DVI mechanism for PPIs where
possible.  When mapping a virtual irq to a physical one for a GICv5
guest, the corresponding bit in the DVI bitmap is set. When unmapping,
said bit is cleared again. The key user of this mechanism is the arch
timer.

The existing code used the non-atomic __assign_bit() rather than doing
the update atomically. This could technically result in losing state
if a second PPI's DVI bit were being manipulated concurrently. Each
individual bit within the DVI bitmap is guarded using
vgic_irq->irq_lock, but there's no locking for the overall
bitmap. Therefore, switch to using the atomic assign_bit() function
instead.

Fixes: 5a98d0e17e59 ("KVM: arm64: gic-v5: Implement direct injection of PPIs")
Link: https://sashiko.dev/#/patchset/20260319154937.3619520-1-sascha.bischoff%40arm.com
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 7916bd8d564ef..d4789ff3e7402 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -272,7 +272,7 @@ void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
 	lockdep_assert_held(&irq->irq_lock);

 	ppi = vgic_v5_get_hwirq_id(irq->intid);
-	__assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
+	assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
 }

 static const struct irq_ops vgic_v5_ppi_irq_ops = {
-- 
2.47.3

^ permalink raw reply related

* [PATCH v2 6/8] arm64: kdump: exclude non-dumpable reserved memory regions from vmcore
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

Reserved memory regions are excluded from vmcore by default unless
marked dumpable. Honor the dumpable flag to filter out device firmware
regions (e.g., GPU, DSP, modem) reserved via device tree, since they
typically contain data not useful for kernel crash analysis and can
significantly increase vmcore size.

Use of_reserved_mem_kdump_exclude() to perform the exclusion, and
pre-size the crash_mem array via of_reserved_mem_kdump_nr_ranges().

Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
Tested-by: Meijing Zhao <zhaomeijing@lixiang.com>
---
 arch/arm64/kernel/machine_kexec_file.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index e31fabed378a..1d65320c6ba4 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -17,6 +17,7 @@
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -51,6 +52,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 	nr_ranges = 2; /* for exclusion of crashkernel region */
 	for_each_mem_range(i, &start, &end)
 		nr_ranges++;
+	nr_ranges += of_reserved_mem_kdump_nr_ranges();
 
 	cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
 	if (!cmem)
@@ -75,6 +77,10 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 			goto out;
 	}
 
+	ret = of_reserved_mem_kdump_exclude(cmem);
+	if (ret)
+		goto out;
+
 	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
 
 out:
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 5/8] of: reserved_mem: add kdump helpers to exclude non-dumpable regions
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

Add two helpers to exclude non-dumpable regions for arch-specific
code.

 - of_reserved_mem_kdump_nr_ranges() returns the count of regions
   that are not dumpable. Each excluded region may split an existing
   crash_mem range into two, so callers use this to calculate
   crash_mem allocation size.

 - of_reserved_mem_kdump_exclude() walks reserved_mem[] and calls
   crash_exclude_mem_range() for every non-dumpable region.

Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
Tested-by: Meijing Zhao <zhaomeijing@lixiang.com>
---
 drivers/of/of_reserved_mem.c    | 34 +++++++++++++++++++++++++++++++++
 include/linux/of_reserved_mem.h | 14 ++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 6dfe9e03c535..ef9732865783 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/memblock.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_core.h>
 
 #include "of_private.h"
 
@@ -851,6 +852,39 @@ struct reserved_mem *of_reserved_mem_lookup(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_reserved_mem_lookup);
 
+/*
+ * Count non-dumpable reserved regions. Excluding each one may split a
+ * crash_mem range in two, callers use this to size the allocation.
+ */
+unsigned int of_reserved_mem_kdump_nr_ranges(void)
+{
+	unsigned int i, n = 0;
+
+	for (i = 0; i < reserved_mem_count; i++)
+		if (reserved_mem[i].size && !reserved_mem[i].dumpable)
+			n++;
+	return n;
+}
+
+/* Exclude non-dumpable reserved regions from @cmem. */
+int of_reserved_mem_kdump_exclude(struct crash_mem *cmem)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < reserved_mem_count; i++) {
+		struct reserved_mem *r = &reserved_mem[i];
+
+		if (!r->size || r->dumpable)
+			continue;
+		ret = crash_exclude_mem_range(cmem, r->base,
+					      r->base + r->size - 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 /**
  * of_reserved_mem_region_to_resource() - Get a reserved memory region as a resource
  * @np:		node containing 'memory-region' property
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 55a67cee41ea..70db99f1fbff 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -8,6 +8,7 @@
 struct of_phandle_args;
 struct reserved_mem_ops;
 struct resource;
+struct crash_mem;
 
 struct reserved_mem {
 	const char			*name;
@@ -48,6 +49,9 @@ int of_reserved_mem_region_to_resource_byname(const struct device_node *np,
 					      const char *name, struct resource *res);
 int of_reserved_mem_region_count(const struct device_node *np);
 
+unsigned int of_reserved_mem_kdump_nr_ranges(void);
+int of_reserved_mem_kdump_exclude(struct crash_mem *cmem);
+
 #else
 
 #define RESERVEDMEM_OF_DECLARE(name, compat, ops)			\
@@ -92,6 +96,16 @@ static inline int of_reserved_mem_region_count(const struct device_node *np)
 {
 	return 0;
 }
+
+static inline unsigned int of_reserved_mem_kdump_nr_ranges(void)
+{
+	return 0;
+}
+
+static inline int of_reserved_mem_kdump_exclude(struct crash_mem *cmem)
+{
+	return 0;
+}
 #endif
 
 /**
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 4/8] of: reserved_mem: save /memreserve/ entries into the reserved_mem array
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

/memreserve/ is used by firmware or bootloaders, such regions hold no
useful data for crash analysis, they should be excluded from the
kdump vmcore, so save /memreserve/ entries into the reserved_mem array
for later exclusion.

If a /memreserve/ entry overlaps any dumpable reserved region, mark
the whole memreserve entry dumpable as well. This may keep slightly
more memory in vmcore than strictly necessary, but avoids splitting
entries and never drops data that may be useful for crash analysis.

Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
Tested-by: Meijing Zhao <zhaomeijing@lixiang.com>
---
 drivers/of/fdt.c             |  5 ++++
 drivers/of/of_private.h      |  2 ++
 drivers/of/of_reserved_mem.c | 55 ++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 82f7327c59ea..d2bcaf149fe8 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -499,6 +499,7 @@ void __init early_init_fdt_scan_reserved_mem(void)
 	int n;
 	int res;
 	u64 base, size;
+	int nr_memreserve = 0;
 
 	if (!initial_boot_params)
 		return;
@@ -516,7 +517,9 @@ void __init early_init_fdt_scan_reserved_mem(void)
 		if (!size)
 			break;
 		memblock_reserve(base, size);
+		nr_memreserve++;
 	}
+	fdt_reserved_mem_account_memreserve(nr_memreserve);
 }
 
 /**
@@ -1287,6 +1290,8 @@ void __init unflatten_device_tree(void)
 	/* Save the statically-placed regions in the reserved_mem array */
 	fdt_scan_reserved_mem_late();
 
+	fdt_reserved_mem_save_memreserve_entries();
+
 	/* Populate an empty root node when bootloader doesn't provide one */
 	if (!fdt) {
 		fdt = (void *) __dtb_empty_root_begin;
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 0ae16da066e2..646b5f43ad47 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -187,6 +187,8 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node *
 
 int fdt_scan_reserved_mem(void);
 void __init fdt_scan_reserved_mem_late(void);
+void __init fdt_reserved_mem_account_memreserve(int n);
+void __init fdt_reserved_mem_save_memreserve_entries(void);
 
 bool of_fdt_device_is_available(const void *blob, unsigned long node);
 
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 313cbc57aa45..6dfe9e03c535 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -241,6 +241,43 @@ static void __init __rmem_check_for_overlap(void)
 	}
 }
 
+static void __init fdt_reserved_mem_add_memreserve(phys_addr_t base,
+						   phys_addr_t size)
+{
+	struct reserved_mem *rmem;
+	bool dumpable = false;
+	int i;
+
+	if (reserved_mem_count == total_reserved_mem_cnt) {
+		pr_err("not enough space for memreserve regions.\n");
+		return;
+	}
+
+	for (i = 0; i < reserved_mem_count; i++) {
+		rmem = &reserved_mem[i];
+
+		if (!rmem->dumpable)
+			continue;
+
+		if (base < rmem->base + rmem->size && rmem->base < base + size) {
+			dumpable = true;
+			break;
+		}
+	}
+
+	rmem = &reserved_mem[reserved_mem_count];
+	rmem->base = base;
+	rmem->size = size;
+	rmem->dumpable = dumpable;
+
+	reserved_mem_count++;
+}
+
+void __init fdt_reserved_mem_account_memreserve(int n)
+{
+	total_reserved_mem_cnt += n;
+}
+
 /**
  * fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved
  * memory regions.
@@ -301,6 +338,24 @@ void __init fdt_scan_reserved_mem_late(void)
 	__rmem_check_for_overlap();
 }
 
+void __init fdt_reserved_mem_save_memreserve_entries(void)
+{
+	const void *fdt = initial_boot_params;
+	u64 base, size;
+	int n;
+
+	if (!fdt)
+		return;
+
+	for (n = 0; ; n++) {
+		if (fdt_get_mem_rsv(fdt, n, &base, &size))
+			break;
+		if (!size)
+			break;
+		fdt_reserved_mem_add_memreserve(base, size);
+	}
+}
+
 static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname);
 
 /*
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 3/8] of: reserved_mem: add dumpable flag to opt-in vmcore
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

Add a 'dumpable' flag to struct reserved_mem so the kernel can decide
whether a reserved area should be included in the kdump vmcore. Most
reserved regions are owned by devices and do not contain data useful
for kernel crash analysis, so excluding them by default is the right
behaviour.

Reusable CMA regions are different: pages in a CMA region are handed
back to the buddy allocator and may contain key data for crash
analysis, so set dumpable to true in rmem_cma_setup().

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
Tested-by: Meijing Zhao <zhaomeijing@lixiang.com>
Link: https://lore.kernel.org/all/20260506144542.GA2072596-robh@kernel.org/
---
 include/linux/of_reserved_mem.h | 1 +
 kernel/dma/contiguous.c         | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index e8b20b29fa68..55a67cee41ea 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -15,6 +15,7 @@ struct reserved_mem {
 	phys_addr_t			base;
 	phys_addr_t			size;
 	void				*priv;
+	bool				dumpable;
 };
 
 struct reserved_mem_ops {
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 03f52bd17120..eddec89eb414 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -579,6 +579,7 @@ static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
 		dma_contiguous_default_area = cma;
 
 	rmem->priv = cma;
+	rmem->dumpable = true;
 
 	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 2/8] kexec/crash: provide crash_exclude_mem_range() stub when CONFIG_CRASH_DUMP=n
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

Prepare for an upcoming change that excludes non-dumpable reserved
regions from the kdump vmcore and will call crash_exclude_mem_range()
from generic, non-arch code.

No functional change.

Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
---
 include/linux/crash_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index c1dee3f971a9..0033d4777648 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -87,6 +87,12 @@ static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 static inline int kexec_crash_loaded(void) { return 0; }
 static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {};
 static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; };
+static inline int crash_exclude_mem_range(struct crash_mem *mem,
+					  unsigned long long mstart,
+					  unsigned long long mend)
+{
+	return 0;
+}
 #endif /* CONFIG_CRASH_DUMP*/
 
 #ifdef CONFIG_CRASH_DM_CRYPT
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 1/8] of: reserved_mem: handle NULL name in of_reserved_mem_lookup()
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai
In-Reply-To: <20260520091844.592753-1-chenwandun@lixiang.com>

From: Wandun Chen <chenwandun1@gmail.com>

From: Wandun Chen <chenwandun@lixiang.com>

Prepare for an upcoming change that appends /memreserve/ entries to
reserved_mem[]; such entries have no name.

No functional change.

Signed-off-by: Wandun Chen <chenwandun@lixiang.com>
---
 drivers/of/of_reserved_mem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 8d5777cb5d1b..313cbc57aa45 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -788,7 +788,8 @@ struct reserved_mem *of_reserved_mem_lookup(struct device_node *np)
 
 	name = kbasename(np->full_name);
 	for (i = 0; i < reserved_mem_count; i++)
-		if (!strcmp(reserved_mem[i].name, name))
+		if (reserved_mem[i].name &&
+		    !strcmp(reserved_mem[i].name, name))
 			return &reserved_mem[i];
 
 	return NULL;
-- 
2.43.0



^ permalink raw reply related

* [PATCH v2 0/8] kdump: reduce vmcore size and capture time
From: Wandun Chen @ 2026-05-20  9:18 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, loongarch, linux-riscv,
	devicetree, kexec, iommu, zhaomeijing
  Cc: catalin.marinas, will, chenhuacai, kernel, pjw, palmer, aou, alex,
	robh, saravanak, akpm, bhe, rppt, pasha.tatashin, pratyush,
	ruirui.yang, m.szyprowski, robin.murphy, leitao, kees, coxu,
	tangyouling, songshuaishuai

This is v2 of the vmcore size optimization series.

The original v1 [1] contains two parts of work:
 - Bug fixes and small cleanups about reserved memory.
 - A vmcore size optimization that excludes reserved memory out of
   vmcore.

For the convenience of review, I have split it into two independent
patchsets. This patchset focuses on the vmcore size optimization.

Motivation
==========

On SoCs that carve out large firmware-owned reserved memory (GPU
firmware, DSP, modem, camera ISP, NPU, ...), kdump currently dumps
those carveouts as part of system RAM even though their contents are
firmware state that is not useful for kernel crash analysis.

This series excludes /reserved-memory regions from vmcore by default,
and also for /memreserve/ firmware regions. The corresponding kdump
time has decreased, and the vmcore size has become smaller.

v1 --> v2:
1. v1 added an opt-out DT property ('linux,no-dump'). Per Rob's
   feedback [2], v2 drop that property and exclude reserve memory
   by default.
2. Split some prepared patches from the original patches.
3. Address coding-style comments on patch 5 from Rob.

[1] https://lore.kernel.org/lkml/20260429065831.1510858-1-chenwandun@lixiang.com/
[2] https://lore.kernel.org/lkml/20260506144542.GA2072596-robh@kernel.org/

Wandun Chen (8):
  of: reserved_mem: handle NULL name in of_reserved_mem_lookup()
  kexec/crash: provide crash_exclude_mem_range() stub when
    CONFIG_CRASH_DUMP=n
  of: reserved_mem: add dumpable flag to opt-in vmcore
  of: reserved_mem: save /memreserve/ entries into the reserved_mem
    array
  of: reserved_mem: add kdump helpers to exclude non-dumpable regions
  arm64: kdump: exclude non-dumpable reserved memory regions from vmcore
  riscv: kdump: exclude non-dumpable reserved memory regions from vmcore
  loongarch: kdump: exclude non-dumpable reserved memory regions from
    vmcore

 arch/arm64/kernel/machine_kexec_file.c     |  6 ++
 arch/loongarch/kernel/machine_kexec_file.c |  6 ++
 arch/riscv/kernel/machine_kexec_file.c     |  4 +
 drivers/of/fdt.c                           |  5 ++
 drivers/of/of_private.h                    |  2 +
 drivers/of/of_reserved_mem.c               | 92 +++++++++++++++++++++-
 include/linux/crash_core.h                 |  6 ++
 include/linux/of_reserved_mem.h            | 15 ++++
 kernel/dma/contiguous.c                    |  1 +
 9 files changed, 136 insertions(+), 1 deletion(-)

-- 
2.43.0

^ permalink raw reply

* Re: [PATCH v7 16/23] drm: bridge: dw_hdmi: Update EDID and CEC phys addr in bridge detect()
From: Neil Armstrong @ 2026-05-20  9:17 UTC (permalink / raw)
  To: Jonas Karlman, Andrzej Hajda, Robert Foss, Heiko Stuebner,
	Laurent Pinchart, Jernej Skrabec, Luca Ceresoli,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
	Simona Vetter
  Cc: Liu Ying, Sandy Huang, Andy Yan, Chen-Yu Tsai, Christian Hewitt,
	Diederik de Haas, Nicolas Frattaroli, Dmitry Baryshkov, dri-devel,
	linux-arm-kernel, linux-rockchip, linux-amlogic, linux-sunxi, imx,
	linux-kernel
In-Reply-To: <20260518180206.2480119-17-jonas@kwiboo.se>

On 5/18/26 20:01, Jonas Karlman wrote:
> Update EDID and CEC phys addr in the bridge detect() func to closely
> match the behavior of a bridge connector with a HDMI bridge attached
> and the dw-hdmi connector.
> 
> This change introduce a slight delay to the bridge connector detect()
> and get_modes() funcs due to multiple EDID reads. This is an acceptable
> added delay to help ensure EDID and CEC phys addr always are correct.
> 
> Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
> ---
> v7: Update commit message
> v6: New patch
> 
> This is a temporary change until dw-hdmi is fully converted into a
> HDMI bridge in a future part of this multi-series effort.
> 
> The patch "drm/bridge-connector: Use cached connector status in
> .get_modes()" [1] can help remove one unnecessary EDID read until
> dw-hdmi is fully converted into a HDMI bridge.
> 
> [1] https://lore.kernel.org/dri-devel/20260426-dw-hdmi-qp-scramb-v5-3-d778e70c317b@collabora.com/
> ---
>   drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 11 ++++++++++-
>   1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> index 37406555af7b..0c4388e7aa5e 100644
> --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> @@ -2947,8 +2947,17 @@ static enum drm_connector_status
>   dw_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector)
>   {
>   	struct dw_hdmi *hdmi = bridge->driver_private;
> +	enum drm_connector_status status;
>   
> -	return dw_hdmi_detect(hdmi);
> +	status = dw_hdmi_detect(hdmi);
> +
> +	/*
> +	 * Update EDID and CEC phys addr to match the behavior of a bridge
> +	 * connector with a HDMI bridge attached and the dw-hdmi connector.
> +	 */
> +	dw_hdmi_connector_status_update(hdmi, connector, status);
> +
> +	return status;
>   }
>   
>   static const struct drm_edid *dw_hdmi_bridge_edid_read(struct drm_bridge *bridge,

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>

Thanks,
Neil


^ permalink raw reply

* [PATCH v2] i2c: imx-lpi2c: reset controller in probe stage
From: Carlos Song (OSS) @ 2026-05-20  9:16 UTC (permalink / raw)
  To: aisheng.dong, andi.shyti, Frank.Li, s.hauer, kernel, festevam
  Cc: linux-i2c, imx, linux-arm-kernel, linux-kernel, Carlos Song

From: Carlos Song <carlos.song@nxp.com>

Reset I2C controller in probe stage to avoid unexpected LPI2C controller
state left from previous stages and hang system boot.

Per the LPI2C reference manual, section 7.1.4 "Controller Control (MCR)",
the RST bit (bit 1) description states:

  "The reset takes effect immediately and remains asserted until negated
  by software. There is no minimum delay required before clearing the
  software reset."

Therefore, it is safe to write 0 to MCR immediately after asserting the
RST bit without any additional delay.

Signed-off-by: Carlos Song <carlos.song@nxp.com>
---
Change for v2:
  - Jump to rpm_disable instread of returning directly if the IRQ request
    fails
---
 drivers/i2c/busses/i2c-imx-lpi2c.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 01ee38131ef2..6e298424de5e 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -1499,11 +1499,6 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
 	if (ret)
 		lpi2c_imx->bitrate = I2C_MAX_STANDARD_MODE_FREQ;
 
-	ret = devm_request_irq(&pdev->dev, lpi2c_imx->irq, lpi2c_imx_isr, IRQF_NO_SUSPEND,
-			       pdev->name, lpi2c_imx);
-	if (ret)
-		return dev_err_probe(&pdev->dev, ret, "can't claim irq %d\n", lpi2c_imx->irq);
-
 	i2c_set_adapdata(&lpi2c_imx->adapter, lpi2c_imx);
 	platform_set_drvdata(pdev, lpi2c_imx);
 
@@ -1535,10 +1530,25 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
+	/*
+	 * Reset all internal controller logic and registers to avoid effects of previous status
+	 * The reset takes effect immediately and there is no minimum delay required before
+	 * clearing the software reset.
+	 */
+	writel(MCR_RST, lpi2c_imx->base + LPI2C_MCR);
+	writel(0, lpi2c_imx->base + LPI2C_MCR);
+
 	temp = readl(lpi2c_imx->base + LPI2C_PARAM);
 	lpi2c_imx->txfifosize = 1 << (temp & 0x0f);
 	lpi2c_imx->rxfifosize = 1 << ((temp >> 8) & 0x0f);
 
+	ret = devm_request_irq(&pdev->dev, lpi2c_imx->irq, lpi2c_imx_isr, IRQF_NO_SUSPEND,
+			       pdev->name, lpi2c_imx);
+	if (ret) {
+		dev_err_probe(&pdev->dev, ret, "can't claim irq %d\n", lpi2c_imx->irq);
+		goto rpm_disable;
+	}
+
 	/* Init optional bus recovery function */
 	ret = lpi2c_imx_init_recovery_info(lpi2c_imx, pdev);
 	/* Give it another chance if pinctrl used is not ready yet */
-- 
2.43.0



^ permalink raw reply related

* Re: [PATCH v2 6/7] mm/vmalloc: align vm_area so vmap() can batch mappings
From: Uladzislau Rezki @ 2026-05-20  9:12 UTC (permalink / raw)
  To: Barry Song
  Cc: Uladzislau Rezki, Wen Jiang, linux-mm, linux-arm-kernel,
	catalin.marinas, will, akpm, Xueyuan.chen21, dev.jain, rppt,
	david, ryan.roberts, anshuman.khandual, ajd, linux-kernel,
	Wen Jiang
In-Reply-To: <CAGsJ_4zjQDOXToau0xoWOTnoP9kODxRdzSeNX6n_b3wxguC4mw@mail.gmail.com>

On Wed, May 20, 2026 at 03:55:02PM +0800, Barry Song wrote:
> On Wed, May 20, 2026 at 3:37 PM Uladzislau Rezki <urezki@gmail.com> wrote:
> >
> > On Thu, May 14, 2026 at 05:41:07PM +0800, Wen Jiang wrote:
> > > From: "Barry Song (Xiaomi)" <baohua@kernel.org>
> > >
> > > Try to align the vmap virtual address to PMD_SHIFT or a
> > > larger PTE mapping size hinted by the architecture, so
> > > contiguous pages can be batch-mapped when setting PMD or
> > > PTE entries.
> > >
> > > Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
> > > Signed-off-by: Wen Jiang <jiangwen6@xiaomi.com>
> > > Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com>
> > > ---
> > >  mm/vmalloc.c | 31 ++++++++++++++++++++++++++++++-
> > >  1 file changed, 30 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index c30a7673e..b3389c8f1 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -3591,6 +3591,35 @@ static int __vmap_huge(unsigned long addr, unsigned long end,
> > >       return err;
> > >  }
> > >
> > > +static struct vm_struct *get_aligned_vm_area(unsigned long size, unsigned long flags)
> > > +{
> > > +     unsigned int shift = (size >= PMD_SIZE) ? PMD_SHIFT :
> > > +                             arch_vmap_pte_supported_shift(size);
> > > +     struct vm_struct *vm_area = NULL;
> > > +
> > > +     /*
> > > +      * Try to allocate an aligned vm_area so contiguous pages can be
> > > +      * mapped in batches.
> > > +      */
> > > +     while (1) {
> > > +             unsigned long align = 1UL << shift;
> > > +
> > > +             vm_area = __get_vm_area_node(size, align, PAGE_SHIFT, flags,
> > > +                             VMALLOC_START, VMALLOC_END,
> > > +                             NUMA_NO_NODE, GFP_KERNEL,
> > > +                             __builtin_return_address(0));
> > > +             if (vm_area || shift <= PAGE_SHIFT)
> > > +                     goto out;
> > > +             if (shift == PMD_SHIFT)
> > > +                     shift = arch_vmap_pte_supported_shift(size);
> > > +             else if (shift > PAGE_SHIFT)
> > > +                     shift = PAGE_SHIFT;
> > > +     }
> > > +
> > > +out:
> > > +     return vm_area;
> > > +}
> > > +
> > IMO, we should get rid of this while(1) loop. It looks like you need to
> > handle just few cases. 3?
> 
Hello, Barry!

> 
> I don’t quite understand what you mean — are you suggesting
> calling __get_vm_area_node() three times? We try 2MB first,
> then 64KB, and finally 4KB. If 2MB succeeds, there is no
> reason to try 64KB. Likewise, if 64KB succeeds, there is no
> need to fall back to 4KB.
> 
I mean either to make three cases kind of open-coded:

...
if (size >= PMD_SIZE)
  alloc_vm_area_with_shift(PMD_SHIFT);
  if (vm_area)
    return vm_area;

shift = get_supported_shift(size)
if (shift > PAGE_SHIFT)
  alloc_vm_area_with_shift(shift);
    if (vm_area)
      return vm_area;

return alloc_vm_area_with_shift(PAGE_SHIFT)
...

or put everything into the: for (i = 0; i < 3; i++) - that way it will
finish in any case and for user it is obvious that we handle max 3 scenario.

> >
> >
> > shift min value is PAGE_SHIFT, could you please clarify when it can be less?
> 
> I guess this should be changed to "==" ?
> 
I assume shift can not be less than PAGE_SHIFT :)

--
Uladzislau Rezki


^ permalink raw reply

* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
From: Barry Song @ 2026-05-20  9:07 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: Suren Baghdasaryan, Matthew Wilcox, akpm, linux-mm, david, liam,
	vbabka, rppt, mhocko, jack, pfalcato, wanglian, chentao,
	lianux.mm, kunwu.chan, liyangouwen1, chrisl, kasong, shikemeng,
	nphamcs, bhe, youngjun.park, linux-arm-kernel, linux-kernel,
	loongarch, linuxppc-dev, linux-riscv, linux-s390, Nanzhe Zhao
In-Reply-To: <ag1mk2DSJsiORaxK@lucifer>

On Wed, May 20, 2026 at 3:50 PM Lorenzo Stoakes <ljs@kernel.org> wrote:
>
> On Wed, May 20, 2026 at 05:18:52AM +0800, Barry Song wrote:
> > On Tue, May 19, 2026 at 8:53 PM Lorenzo Stoakes <ljs@kernel.org> wrote:
> > >
> > > On Mon, May 18, 2026 at 12:56:59PM -0700, Suren Baghdasaryan wrote:
> > >
> > > > >
> > > > > I think we either need to fix `fork()`, or keep the current
> > > > > behavior of dropping the VMA lock before performing I/O.
> > > >
> > > > I see. So, this problem arises from the fact that we are changing the
> > > > pagefaults requiring I/O operation to hold VMA lock...
> > > > And you want to lock VMA on fork only if vma_is_anonymous(vma) ||
> > > > is_cow_mapping(vma->vm_flags). So, we will be blocking page faults for
> > > > anonymous and COW VMAs only while holding mmap_write_lock, preventing
> > > > any VMA modification. On the surface, that looks ok to me but I might
> > > > be missing some corner cases. If nobody sees any obvious issues, I
> > > > think it's worth a try.
> > >
> > > Not sure if you noticed but I did raise concerns ;)
> > >
> > > I wonder if you've confused the fault path and fork here, as I think Barry has
> > > been a little unclear on that.
> >
> > I think I’ve been absolutely clear :-)
>
> On this point sure, I would argue less so around the fork stuff but I responded
> on that specifically elsewhere so let's keep things moving :>)
>
> > We should either stick to the current behavior - drop
> > the VMA lock before doing I/O, or change fork() so that it
> > does not wait on vma_start_write().
>
> Again, as I said elsewhere, I think there might be a 3rd way possibly. It's a
> big mistake to assume that there are only specific solutions to problems in the
> kernel then to present a false dichotomy.

I recalled that when we discussed this part in my slides:

‘For simplicity, rather than using a whitelist mechanism for
per-VMA retry, we could use a blacklist instead: default to
always retry via the VMA lock, and only allow mmap_lock-based
page-fault retry for specific cases such as
__vmf_anon_prepare().’

Suren mentioned introducing a FALLBACK flag. With the
FALLBACK flag, we would retry via mmap_lock; with the RETRY
flag, we would retry via the VMA lock.

Not sure whether this could really be called a ‘third way,’
but it seems more like a shift from a whitelist model to a
blacklist model, without changing the fundamental design, but
it does change where we would need to touch the source code.

>
> We absolutely hear you on this being a problem and it WILL be addressed one way
> or another.

Thanks. This is a bit of light in what has felt like a fairly
dark situation. I really appreciate your thoughtful and
responsible approach.

>
> Of the two approaches, as I said elsewhere, I prefer what you've done in this
> series to anything touching fork.
>
> But give me time to look through the series please (I'd also suggest RFC'ing
> when it's something kinda fundamental that might generate converastion, makes
> life a bit easier on the review side :)

Thanks! Sure, I’m happy to wait and there’s no urgency.

Last year you made quite a significant contribution to the work
when I tried to remove mmap_lock in madvise. I really
appreciated it. Now we’re back to the same lock again, just in
different places.

Best Regards
Barry


^ permalink raw reply

* [PACTH v2] i2c: imx-lpi2c: mark I2C adapter when hardware is powered down
From: Carlos Song (OSS) @ 2026-05-20  9:09 UTC (permalink / raw)
  To: aisheng.dong, andi.shyti, Frank.Li, s.hauer, kernel, festevam,
	carlos.song
  Cc: linux-i2c, imx, linux-arm-kernel, linux-kernel, stable

From: Carlos Song <carlos.song@nxp.com>

Mark the I2C adapter as suspended during system suspend to block further
transfers, and resume it on system resume. This prevents potential hangs
when the hardware is powered down but clients still attempt I2C transfers.

Fixes: 1ee867e465c1 ("i2c: imx-lpi2c: add target mode support")
Cc: stable@vger.kernel.org
Signed-off-by: Carlos Song <carlos.song@nxp.com>
---
Change for v2:
  - Call i2c_mark_adapter_suspended() before pm_runtime_force_suspend()
    to prevent potential deadlock if a transfer is active during suspend.
  - Roll back with i2c_mark_adapter_resumed() if pm_runtime_force_suspend()
    fails.
---
 drivers/i2c/busses/i2c-imx-lpi2c.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index a01c23696481..01ee38131ef2 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -1635,7 +1635,18 @@ static int __maybe_unused lpi2c_runtime_resume(struct device *dev)
 
 static int __maybe_unused lpi2c_suspend_noirq(struct device *dev)
 {
-	return pm_runtime_force_suspend(dev);
+	struct lpi2c_imx_struct *lpi2c_imx = dev_get_drvdata(dev);
+	int ret;
+
+	i2c_mark_adapter_suspended(&lpi2c_imx->adapter);
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret) {
+		i2c_mark_adapter_resumed(&lpi2c_imx->adapter);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int __maybe_unused lpi2c_resume_noirq(struct device *dev)
@@ -1655,6 +1666,8 @@ static int __maybe_unused lpi2c_resume_noirq(struct device *dev)
 	if (lpi2c_imx->target)
 		lpi2c_imx_target_init(lpi2c_imx);
 
+	i2c_mark_adapter_resumed(&lpi2c_imx->adapter);
+
 	return 0;
 }
 
-- 
2.43.0



^ permalink raw reply related

* Re: [PATCH v6 06/12] firmware: arm_scmi: Extend powercap report to include MAI
From: Philip Radford @ 2026-05-20  8:58 UTC (permalink / raw)
  To: linux-kernel, linux-arm-kernel, arm-scmi, linux-pm
  Cc: sudeep.holla, james.quinlan, f.fainelli, vincent.guittot,
	etienne.carriere, peng.fan, michal.simek, quic_sibis,
	dan.carpenter, d-gole, souvik.chakravarty
In-Reply-To: <20260518135234.2953532-7-philip.radford@arm.com>

On Mon, May 18, 2026 at 02:52:28PM +0100, Philip Radford wrote:
> Extend scmi_powercap_meas_changed_report to include MAI change
> notifications.
> 
> Signed-off-by: Philip Radford <philip.radford@arm.com>
> ---
> V5->V6
> - Re-worded existing comment for POWERCAP_MEASUREMENTS_NOTIFY
> - Added define for V2/V3 sizes
> - Used new definitions
> ---
>  drivers/firmware/arm_scmi/powercap.c | 28 +++++++++++++++-------------
>  include/linux/scmi_protocol.h        |  1 +
>  2 files changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
> index 1d1188e98d49..fdfaf03277ab 100644
> --- a/drivers/firmware/arm_scmi/powercap.c
> +++ b/drivers/firmware/arm_scmi/powercap.c
> @@ -11,6 +11,7 @@
>  #include <linux/io.h>
>  #include <linux/module.h>
>  #include <linux/scmi_protocol.h>
> +#include <linux/stddef.h>
>  
>  #include <trace/events/scmi.h>
>  
> @@ -21,6 +22,8 @@
>  #define SCMI_PROTOCOL_SUPPORTED_VERSION		0x30000
>  
>  #define CPL0	0
> +#define SZ_V2 (sizeof(struct scmi_powercap_meas_changed_notify_payld))
> +#define SZ_V3 (SZ_V2 - sizeof(__le32))
>  

Apologies all,

After posting I have realised that I mislabelled these macros where SZ_V2
should be the smaller of the two.

This will be fixed in a new version along with any further comments.

Sorry again,
Phil


^ permalink raw reply

* Re: [PATCH v3 0/5] Support the FEAT_HDBSS introduced in Armv9.5
From: Tian Zheng @ 2026-05-20  8:51 UTC (permalink / raw)
  To: Will Deacon, maz, oupton, catalin.marinas, corbet, pbonzini,
	Tian Zheng
  Cc: kernel-team, yuzenghui, wangzhou1, liuyonglong, yezhenyu2,
	joey.gouly, kvmarm, kvm, linux-arm-kernel, linux-doc,
	linux-kernel, skhan, suzuki.poulose, leo.bras, Jonathan Cameron
In-Reply-To: <177918656142.736362.17906576792384645789.b4-ty@kernel.org>



On 5/19/2026 11:23 PM, Will Deacon wrote:
> On Wed, 25 Feb 2026 12:04:16 +0800, Tian Zheng wrote:
>> This series of patches add support to the Hardware Dirty state tracking
>> Structure(HDBSS) feature, which is introduced by the ARM architecture
>> in the DDI0601(ID121123) version.
>>
>> The HDBSS feature is an extension to the architecture that enhances
>> tracking translation table descriptors' dirty state, identified as
>> FEAT_HDBSS. This feature utilizes hardware assistance to achieve dirty
>> page tracking, aiming to significantly reduce the overhead of scanning
>> for dirty pages.
>>
>> [...]
> 
> Applied sysreg definitions to arm64 (for-next/sysregs), thanks!
> 
> [1/5] arm64/sysreg: Add HDBSS related register information
>        https://git.kernel.org/arm64/c/72f7be0c2e30
> 
> Cheers,

Thanks!
Tian



^ permalink raw reply

* [PATCH v2 2/2] KVM: arm64: nv: Don't save/restore FP register during a nested ERET or exception
From: Marc Zyngier @ 2026-05-20  8:50 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel, kvm
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Mark Rutland, Will Deacon, Fuad Tabba
In-Reply-To: <20260520085036.541666-1-maz@kernel.org>

When switching between L1 and L2, we save the old state using
kvm_arch_vcpu_put(), mutate the state in memory, then load the new
state using kvm_arch_vcpu_load(). Any live FPSIMD/SVE state is saved
and unbound, such that it can be lazily restored on a subsequent trap.

The FPSIMD/SVE state is shared by exception levels, and only a handful
of related control registers need to be changed when transitioning
between L1 and L2. The save/restore of the common state is needless
overhead, especially as trapping becomes exponentially more expensive
with nesting.

Avoid this overhead by leaving the common FPSIMD/SVE state live on the
CPU, and only switching the state that is distinct for L1 and L2:

- the trap controls: the effective values are recomputed on each entry
  into the guest to take the EL into account and merge the L0 and L1
  configuration if in a nested context, or directly use the L0 configuration
  in non-nested context (see __activate_traps()).

- the VL settings: the effective values are are also recomputed on each
  entry into the guest (see fpsimd_lazy_switch_to_guest()).

Since we appear to cover all bases, use the vcpu flags indicating the
handling of a nested ERET or exception delivery to avoid the whole FP
save/restore shenanigans. SME will have to be similarly dealt with when
it eventually gets supported.

For an EL1 L3 guest where L1 and L2 have this optimisation, this
results in at least a 10% wall clock reduction when running an I/O
heavy workload, generating a high rate of nested exceptions.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/fpsimd.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 15e17aca1dec0..aca98752a6e42 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -28,6 +28,20 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 	if (!system_supports_fpsimd())
 		return;

+	/*
+	 * Avoid needless save/restore of the guest's common
+	 * FPSIMD/SVE/SME regs during transitions between L1/L2.
+	 *
+	 * These transitions only happens in a non-preemptible context
+	 * where the host regs have already been saved and unbound. The
+	 * live registers are either free or owned by the guest.
+	 */
+	if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+	    vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+		WARN_ON_ONCE(host_owns_fp_regs());
+		return;
+	}
+
 	/*
 	 * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
 	 * that the host kernel is responsible for restoring this state upon
@@ -102,6 +116,15 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 {
 	unsigned long flags;

+	/*
+	 * See comment in kvm_arch_vcpu_load_fp().
+	 */
+	if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+	    vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+		WARN_ON_ONCE(host_owns_fp_regs());
+		return;
+	}
+
 	local_irq_save(flags);

 	if (guest_owns_fp_regs()) {
-- 
2.47.3

^ permalink raw reply related

* [PATCH v2 1/2] KVM: arm64: nv: Track L2 to L1 exception emulation
From: Marc Zyngier @ 2026-05-20  8:50 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel, kvm
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Mark Rutland, Will Deacon, Fuad Tabba
In-Reply-To: <20260520085036.541666-1-maz@kernel.org>

While we currently track that we are emulating a nested ERET from
L1 to L2, we currently don't track the reverse direction (an exception
going from L2 to L1).

Add a new vcpu state flag for this purpose, which will see some
use shortly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h | 3 ++-
 arch/arm64/kvm/emulate-nested.c   | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 65eead8362e0b..c79747d5f4dd1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1112,7 +1112,8 @@ struct kvm_vcpu_arch {
 #define IN_NESTED_ERET		__vcpu_single_flag(sflags, BIT(7))
 /* SError pending for nested guest */
 #define NESTED_SERROR_PENDING	__vcpu_single_flag(sflags, BIT(8))
-
+/* KVM is currently emulating an L2 to L1 exception */
+#define IN_NESTED_EXCEPTION	__vcpu_single_flag(sflags, BIT(9))
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index dba7ced74ca5e..15c691a6266d5 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2862,6 +2862,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
 
 	preempt_disable();
 
+	vcpu_set_flag(vcpu, IN_NESTED_EXCEPTION);
+
 	/*
 	 * We may have an exception or PC update in the EL0/EL1 context.
 	 * Commit it before entering EL2.
@@ -2884,6 +2886,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
 	__kvm_adjust_pc(vcpu);
 
 	kvm_arch_vcpu_load(vcpu, smp_processor_id());
+	vcpu_clear_flag(vcpu, IN_NESTED_EXCEPTION);
+
 	preempt_enable();
 
 	if (kvm_vcpu_has_pmu(vcpu))
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 0/2] KVM: arm64: nv: Reduce FP/SVE overhead on exception/exception return
From: Marc Zyngier @ 2026-05-20  8:50 UTC (permalink / raw)
  To: kvmarm, linux-arm-kernel, kvm
  Cc: Steffen Eiden, Joey Gouly, Suzuki K Poulose, Oliver Upton,
	Zenghui Yu, Mark Rutland, Will Deacon, Fuad Tabba

This is the second version of this short series optimising away a lot
of unnecessary FPSIMD/SVE context switch with NV.

* From v1 [1]:

  - New commit message on patch #2 (Mark)

  - Additional comments and WARN_ON_ONCE() (Mark)

If nobody screams, I'll stick that into -next.

Thanks,

	M.

[1] https://lore.kernel.org/r/20260512140755.3676306-1-maz@kernel.org

Marc Zyngier (2):
  KVM: arm64: nv: Track L2 to L1 exception emulation
  KVM: arm64: nv: Don't save/restore FP register during a nested ERET or
    exception

 arch/arm64/include/asm/kvm_host.h |  3 ++-
 arch/arm64/kvm/emulate-nested.c   |  4 ++++
 arch/arm64/kvm/fpsimd.c           | 23 +++++++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

-- 
2.47.3



^ permalink raw reply

* [PATCH V3 8/8] arm64: dts: imx95: Move power supply properties to Root Port node
From: Sherry Sun (OSS) @ 2026-05-20  8:49 UTC (permalink / raw)
  To: robh, krzk+dt, conor+dt, Frank.Li, s.hauer, kernel, festevam,
	lpieralisi, kwilczynski, mani, bhelgaas, hongxing.zhu, l.stach
  Cc: imx, linux-pci, linux-arm-kernel, devicetree, linux-kernel,
	sherry.sun
In-Reply-To: <20260520084904.2424253-1-sherry.sun@oss.nxp.com>

From: Sherry Sun <sherry.sun@nxp.com>

Move the vpcie-supply and vpcie3v3aux-supply properties from the PCIe
controller nodes to the Root Port child nodes to support the new PCI
pwrctrl framework.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
---
 arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts | 4 ++--
 arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
index 7d820a0f80b2..0d1cdfd54cce 100644
--- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
@@ -555,8 +555,6 @@ &pcie0 {
 	pinctrl-names = "default";
 	/* This property is deprecated, use reset-gpios from the Root Port node. */
 	reset-gpio = <&gpio5 13 GPIO_ACTIVE_LOW>;
-	vpcie-supply = <&reg_m2_pwr>;
-	vpcie3v3aux-supply = <&reg_m2_pwr>;
 	supports-clkreq;
 	status = "disabled";
 };
@@ -570,6 +568,8 @@ &pcie0_ep {
 
 &pcie0_port0 {
 	reset-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+	vpcie-supply = <&reg_m2_pwr>;
+	vpcie3v3aux-supply = <&reg_m2_pwr>;
 };
 
 &sai1 {
diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
index 2e463bc7c601..c08731dfb1ee 100644
--- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
@@ -542,8 +542,6 @@ &pcie0 {
 	pinctrl-names = "default";
 	/* This property is deprecated, use reset-gpios from the Root Port node. */
 	reset-gpio = <&i2c7_pcal6524 5 GPIO_ACTIVE_LOW>;
-	vpcie-supply = <&reg_pcie0>;
-	vpcie3v3aux-supply = <&reg_pcie0>;
 	supports-clkreq;
 	status = "okay";
 };
@@ -557,6 +555,8 @@ &pcie0_ep {
 
 &pcie0_port0 {
 	reset-gpios = <&i2c7_pcal6524 5 GPIO_ACTIVE_LOW>;
+	vpcie-supply = <&reg_pcie0>;
+	vpcie3v3aux-supply = <&reg_pcie0>;
 };
 
 &pcie1 {
@@ -564,8 +564,6 @@ &pcie1 {
 	pinctrl-names = "default";
 	/* This property is deprecated, use reset-gpios from the Root Port node. */
 	reset-gpio = <&i2c7_pcal6524 16 GPIO_ACTIVE_LOW>;
-	vpcie-supply = <&reg_slot_pwr>;
-	vpcie3v3aux-supply = <&reg_slot_pwr>;
 	status = "okay";
 };
 
@@ -578,6 +576,8 @@ &pcie1_ep {
 
 &pcie1_port0 {
 	reset-gpios = <&i2c7_pcal6524 16 GPIO_ACTIVE_LOW>;
+	vpcie-supply = <&reg_slot_pwr>;
+	vpcie3v3aux-supply = <&reg_slot_pwr>;
 };
 
 &sai1 {
-- 
2.37.1



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox