Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v4 1/3] KVM: arm64: Disable TRBE Trace Buffer Unit when running in guest context
From: Will Deacon @ 2026-03-27 13:00 UTC (permalink / raw)
  To: kvmarm
  Cc: mark.rutland, linux-arm-kernel, Will Deacon, Marc Zyngier,
	Oliver Upton, James Clark, Leo Yan, Suzuki K Poulose, Fuad Tabba,
	Alexandru Elisei, Yabin Cui
In-Reply-To: <20260327130047.21065-1-will@kernel.org>

The nVHE world-switch code relies on zeroing TRFCR_EL1 to disable trace
generation in guest context when self-hosted TRBE is in use by the host.

Per D3.2.1 ("Controls to prohibit trace at Exception levels"), clearing
TRFCR_EL1 means that trace generation is prohibited at EL1 and EL0 but
per R_YCHKJ the Trace Buffer Unit will still be enabled if
TRBLIMITR_EL1.E is set. R_SJFRQ goes on to state that, when enabled, the
Trace Buffer Unit can perform address translation for the "owning
exception level" even when it is out of context.

Consequently, we can end up in a state where TRBE performs speculative
page-table walks for a host VA/IPA in guest/hypervisor context depending
on the value of MDCR_EL2.E2TB, which changes over world-switch. The
potential result appears to be a heady mixture of SErrors, data
corruption and hardware lockups.

Extend the TRBE world-switch code to clear TRBLIMITR_EL1.E after
draining the buffer, restoring the register on return to the host. This
unfortunately means we need to tackle CPU errata #2064142 and #2038923
which add additional synchronisation requirements around manipulations
of the limit register. Hopefully this doesn't need to be fast.

Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: James Clark <james.clark@linaro.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Fixes: a1319260bf62 ("arm64: KVM: Enable access to TRBE support for host")
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h  |  1 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 71 ++++++++++++++++++++++++++----
 arch/arm64/kvm/hyp/nvhe/switch.c   |  2 +-
 3 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 70cb9cfd760a..b1335c55dbef 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -770,6 +770,7 @@ struct kvm_host_data {
 		u64 pmscr_el1;
 		/* Self-hosted trace */
 		u64 trfcr_el1;
+		u64 trblimitr_el1;
 		/* Values of trap registers for the host before guest entry. */
 		u64 mdcr_el2;
 		u64 brbcr_el1;
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 2a1c0f49792b..0955af771ad1 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -57,12 +57,54 @@ static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
 	write_sysreg_el1(new_trfcr, SYS_TRFCR);
 }
 
-static bool __trace_needs_drain(void)
+static void __trace_drain_and_disable(void)
 {
-	if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
-		return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
+	u64 *trblimitr_el1 = host_data_ptr(host_debug_state.trblimitr_el1);
+	bool needs_drain = is_protected_kvm_enabled() ?
+			   host_data_test_flag(HAS_TRBE) :
+			   host_data_test_flag(TRBE_ENABLED);
 
-	return host_data_test_flag(TRBE_ENABLED);
+	if (!needs_drain) {
+		*trblimitr_el1 = 0;
+		return;
+	}
+
+	*trblimitr_el1 = read_sysreg_s(SYS_TRBLIMITR_EL1);
+	if (*trblimitr_el1 & TRBLIMITR_EL1_E) {
+		/*
+		 * The host has enabled the Trace Buffer Unit so we have
+		 * to beat the CPU with a stick until it stops accessing
+		 * memory.
+		 */
+
+		/* First, ensure that our prior write to TRFCR has stuck. */
+		isb();
+
+		/* Now synchronise with the trace and drain the buffer. */
+		tsb_csync();
+		dsb(nsh);
+
+		/*
+		 * With no more trace being generated, we can disable the
+		 * Trace Buffer Unit.
+		 */
+		write_sysreg_s(0, SYS_TRBLIMITR_EL1);
+		if (cpus_have_final_cap(ARM64_WORKAROUND_2064142)) {
+			/*
+			 * Some CPUs are so good, we have to drain 'em
+			 * twice.
+			 */
+			tsb_csync();
+			dsb(nsh);
+		}
+
+		/*
+		 * Ensure that the Trace Buffer Unit is disabled before
+		 * we start mucking with the stage-2 and trap
+		 * configuration.
+		 */
+		isb();
+	}
 }
 
 static bool __trace_needs_switch(void)
@@ -79,15 +121,26 @@ static void __trace_switch_to_guest(void)
 
 	__trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
 			  *host_data_ptr(trfcr_while_in_guest));
-
-	if (__trace_needs_drain()) {
-		isb();
-		tsb_csync();
-	}
+	__trace_drain_and_disable();
 }
 
 static void __trace_switch_to_host(void)
 {
+	u64 trblimitr_el1 = *host_data_ptr(host_debug_state.trblimitr_el1);
+
+	if (trblimitr_el1 & TRBLIMITR_EL1_E) {
+		/* Re-enable the Trace Buffer Unit for the host. */
+		write_sysreg_s(trblimitr_el1, SYS_TRBLIMITR_EL1);
+		isb();
+		if (cpus_have_final_cap(ARM64_WORKAROUND_2038923)) {
+			/*
+			 * Make sure the unit is re-enabled before we
+			 * poke TRFCR.
+			 */
+			isb();
+		}
+	}
+
 	__trace_do_switch(host_data_ptr(trfcr_while_in_guest),
 			  *host_data_ptr(host_debug_state.trfcr_el1));
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 779089e42681..f00688e69d88 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -278,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	 * We're about to restore some new MMU state. Make sure
 	 * ongoing page-table walks that have started before we
 	 * trapped to EL2 have completed. This also synchronises the
-	 * above disabling of BRBE, SPE and TRBE.
+	 * above disabling of BRBE and SPE.
 	 *
 	 * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
 	 * rule R_LFHQG and subsequent information statements.
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 3/3] KVM: arm64: Don't pass host_debug_state to BRBE world-switch routines
From: Will Deacon @ 2026-03-27 13:00 UTC (permalink / raw)
  To: kvmarm
  Cc: mark.rutland, linux-arm-kernel, Will Deacon, Marc Zyngier,
	Oliver Upton, James Clark, Leo Yan, Suzuki K Poulose, Fuad Tabba,
	Alexandru Elisei, Yabin Cui
In-Reply-To: <20260327130047.21065-1-will@kernel.org>

Now that the SPE and BRBE nVHE world-switch routines operate on the
host_debug_state directly, tweak the BRBE code to do the same for
consistency.

This is purely cosmetic.

Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: James Clark <james.clark@linaro.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 84bc80f4e36b..f8904391c125 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -156,8 +156,10 @@ static void __trace_switch_to_host(void)
 			  *host_data_ptr(host_debug_state.trfcr_el1));
 }
 
-static void __debug_save_brbe(u64 *brbcr_el1)
+static void __debug_save_brbe(void)
 {
+	u64 *brbcr_el1 = host_data_ptr(host_debug_state.brbcr_el1);
+
 	*brbcr_el1 = 0;
 
 	/* Check if the BRBE is enabled */
@@ -173,8 +175,10 @@ static void __debug_save_brbe(u64 *brbcr_el1)
 	write_sysreg_el1(0, SYS_BRBCR);
 }
 
-static void __debug_restore_brbe(u64 brbcr_el1)
+static void __debug_restore_brbe(void)
 {
+	u64 brbcr_el1 = *host_data_ptr(host_debug_state.brbcr_el1);
+
 	if (!brbcr_el1)
 		return;
 
@@ -190,7 +194,7 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 
 	/* Disable BRBE branch records */
 	if (host_data_test_flag(HAS_BRBE))
-		__debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1));
+		__debug_save_brbe();
 
 	if (__trace_needs_switch())
 		__trace_switch_to_guest();
@@ -206,7 +210,7 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 	if (host_data_test_flag(HAS_SPE))
 		__debug_restore_spe();
 	if (host_data_test_flag(HAS_BRBE))
-		__debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
+		__debug_restore_brbe();
 	if (__trace_needs_switch())
 		__trace_switch_to_host();
 }
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 2/3] KVM: arm64: Disable SPE Profiling Buffer when running in guest context
From: Will Deacon @ 2026-03-27 13:00 UTC (permalink / raw)
  To: kvmarm
  Cc: mark.rutland, linux-arm-kernel, Will Deacon, Marc Zyngier,
	Oliver Upton, James Clark, Leo Yan, Suzuki K Poulose, Fuad Tabba,
	Alexandru Elisei, Yabin Cui
In-Reply-To: <20260327130047.21065-1-will@kernel.org>

The nVHE world-switch code relies on zeroing PMSCR_EL1 to disable
profiling data generation in guest context when SPE is in use by the
host.

Unfortunately, this may leave PMBLIMITR_EL1.E set and consequently we
can end up running in guest/hypervisor context with the Profiling Buffer
enabled. The current "known issues" document for Rev M.a of the Arm ARM
states that this can lead to speculative, out-of-context translations:

  | 2.18 D23136:
  |
  | When the Profiling Buffer is enabled, profiling is not stopped, and
  | Discard mode is not enabled, the Statistical Profiling Unit might
  | cause speculative translations for the owning translation regime,
  | including when the owning translation regime is out-of-context.

In a similar fashion to TRBE, ensure that the Profiling Buffer is
disabled during the nVHE world switch before we start messing with the
stage-2 MMU and trap configuration.

Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: James Clark <james.clark@linaro.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Fuad Tabba <tabba@google.com>
Fixes: f85279b4bd48 ("arm64: KVM: Save/restore the host SPE state when entering/leaving a VM")
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h  |  1 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 33 ++++++++++++++++++++----------
 arch/arm64/kvm/hyp/nvhe/switch.c   |  2 +-
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b1335c55dbef..fe588760fe62 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -768,6 +768,7 @@ struct kvm_host_data {
 		struct kvm_guest_debug_arch regs;
 		/* Statistical profiling extension */
 		u64 pmscr_el1;
+		u64 pmblimitr_el1;
 		/* Self-hosted trace */
 		u64 trfcr_el1;
 		u64 trblimitr_el1;
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 0955af771ad1..84bc80f4e36b 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -14,20 +14,20 @@
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 
-static void __debug_save_spe(u64 *pmscr_el1)
+static void __debug_save_spe(void)
 {
-	u64 reg;
+	u64 *pmscr_el1, *pmblimitr_el1;
 
-	/* Clear pmscr in case of early return */
-	*pmscr_el1 = 0;
+	pmscr_el1 = host_data_ptr(host_debug_state.pmscr_el1);
+	pmblimitr_el1 = host_data_ptr(host_debug_state.pmblimitr_el1);
 
 	/*
 	 * At this point, we know that this CPU implements
 	 * SPE and is available to the host.
 	 * Check if the host is actually using it ?
 	 */
-	reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
-	if (!(reg & BIT(PMBLIMITR_EL1_E_SHIFT)))
+	*pmblimitr_el1 = read_sysreg_s(SYS_PMBLIMITR_EL1);
+	if (!(*pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
 		return;
 
 	/* Yes; save the control register and disable data generation */
@@ -37,18 +37,29 @@ static void __debug_save_spe(u64 *pmscr_el1)
 
 	/* Now drain all buffered data to memory */
 	psb_csync();
+	dsb(nsh);
+
+	/* And disable the profiling buffer */
+	write_sysreg_s(0, SYS_PMBLIMITR_EL1);
+	isb();
 }
 
-static void __debug_restore_spe(u64 pmscr_el1)
+static void __debug_restore_spe(void)
 {
-	if (!pmscr_el1)
+	u64 pmblimitr_el1 = *host_data_ptr(host_debug_state.pmblimitr_el1);
+
+	if (!(pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
 		return;
 
 	/* The host page table is installed, but not yet synchronised */
 	isb();
 
+	/* Re-enable the profiling buffer. */
+	write_sysreg_s(pmblimitr_el1, SYS_PMBLIMITR_EL1);
+	isb();
+
 	/* Re-enable data generation */
-	write_sysreg_el1(pmscr_el1, SYS_PMSCR);
+	write_sysreg_el1(*host_data_ptr(host_debug_state.pmscr_el1), SYS_PMSCR);
 }
 
 static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
@@ -175,7 +186,7 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
 	/* Disable and flush SPE data generation */
 	if (host_data_test_flag(HAS_SPE))
-		__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
+		__debug_save_spe();
 
 	/* Disable BRBE branch records */
 	if (host_data_test_flag(HAS_BRBE))
@@ -193,7 +204,7 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
 void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
 	if (host_data_test_flag(HAS_SPE))
-		__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
+		__debug_restore_spe();
 	if (host_data_test_flag(HAS_BRBE))
 		__debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
 	if (__trace_needs_switch())
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f00688e69d88..9b6e87dac3b9 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -278,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	 * We're about to restore some new MMU state. Make sure
 	 * ongoing page-table walks that have started before we
 	 * trapped to EL2 have completed. This also synchronises the
-	 * above disabling of BRBE and SPE.
+	 * above disabling of BRBE.
 	 *
 	 * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
 	 * rule R_LFHQG and subsequent information statements.
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* Re: (subset) [PATCH v17 0/8] support FEAT_LSUI
From: Catalin Marinas @ 2026-03-27 13:16 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, kvmarm, kvm, linux-kselftest,
	Yeoreum Yun
  Cc: will, maz, oupton, miko.lenczewski, kevin.brodsky, broonie, ardb,
	suzuki.poulose, lpieralisi, joey.gouly, yuzenghui
In-Reply-To: <20260314175133.1084528-1-yeoreum.yun@arm.com>

On Sat, 14 Mar 2026 17:51:25 +0000, Yeoreum Yun wrote:
> Since Armv9.6, FEAT_LSUI supplies the load/store instructions for
> previleged level to access to access user memory without clearing
> PSTATE.PAN bit.
> 
> This patchset support FEAT_LSUI and applies it mainly in
> futex atomic operation and others.
> 
> [...]

Applied to arm64 (for-next/feat_lsui), thanks!

I decided to drop patch [6/8] (arm64: armv8_deprecated: disable swp
emulation when FEAT_LSUI present). The way FEAT_LSUI support looks now,
we still have uaccess_enable_privileged() working properly and we could
even support SWP emulation using exclusives. While it's highly unlikely
to see both 32-bit EL0 and FEAT_LSUI in practice, models may support the
combination and disabling SWP emulation feels pretty artificial.

[1/8] arm64: cpufeature: add FEAT_LSUI
      https://git.kernel.org/arm64/c/7181f718cb0f
[2/8] KVM: arm64: expose FEAT_LSUI to guest
      https://git.kernel.org/arm64/c/f6bff18d05ed
[3/8] KVM: arm64: kselftest: set_id_regs: add test for FEAT_LSUI
      https://git.kernel.org/arm64/c/42550d7d8aa6
[4/8] arm64: futex: refactor futex atomic operation
      https://git.kernel.org/arm64/c/eaa3babcceaa
[5/8] arm64: futex: support futex with FEAT_LSUI
      https://git.kernel.org/arm64/c/44adf2bf40ef
[7/8] KVM: arm64: use CAST instruction for swapping guest descriptor
      https://git.kernel.org/arm64/c/16dbe77a5be2
[8/8] arm64: Kconfig: add support for LSUI
      https://git.kernel.org/arm64/c/377609ae8b6a

-- 
Catalin


^ permalink raw reply

* Re: [PATCH v4 3/3] KVM: arm64: Don't pass host_debug_state to BRBE world-switch routines
From: Fuad Tabba @ 2026-03-27 13:23 UTC (permalink / raw)
  To: Will Deacon
  Cc: kvmarm, mark.rutland, linux-arm-kernel, Marc Zyngier,
	Oliver Upton, James Clark, Leo Yan, Suzuki K Poulose,
	Alexandru Elisei, Yabin Cui
In-Reply-To: <20260327130047.21065-4-will@kernel.org>

On Fri, 27 Mar 2026 at 13:01, Will Deacon <will@kernel.org> wrote:
>
> Now that the SPE and BRBE nVHE world-switch routines operate on the
> host_debug_state directly, tweak the BRBE code to do the same for
> consistency.
>
> This is purely cosmetic.

It is indeed... until sashiko proves me wrong again :)

Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad
>
> Cc: Marc Zyngier <maz@kernel.org>
> Cc: Oliver Upton <oupton@kernel.org>
> Cc: James Clark <james.clark@linaro.org>
> Cc: Leo Yan <leo.yan@arm.com>
> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> Cc: Fuad Tabba <tabba@google.com>
> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
> Signed-off-by: Will Deacon <will@kernel.org>
> ---
>  arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> index 84bc80f4e36b..f8904391c125 100644
> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> @@ -156,8 +156,10 @@ static void __trace_switch_to_host(void)
>                           *host_data_ptr(host_debug_state.trfcr_el1));
>  }
>
> -static void __debug_save_brbe(u64 *brbcr_el1)
> +static void __debug_save_brbe(void)
>  {
> +       u64 *brbcr_el1 = host_data_ptr(host_debug_state.brbcr_el1);
> +
>         *brbcr_el1 = 0;
>
>         /* Check if the BRBE is enabled */
> @@ -173,8 +175,10 @@ static void __debug_save_brbe(u64 *brbcr_el1)
>         write_sysreg_el1(0, SYS_BRBCR);
>  }
>
> -static void __debug_restore_brbe(u64 brbcr_el1)
> +static void __debug_restore_brbe(void)
>  {
> +       u64 brbcr_el1 = *host_data_ptr(host_debug_state.brbcr_el1);
> +
>         if (!brbcr_el1)
>                 return;
>
> @@ -190,7 +194,7 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>
>         /* Disable BRBE branch records */
>         if (host_data_test_flag(HAS_BRBE))
> -               __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1));
> +               __debug_save_brbe();
>
>         if (__trace_needs_switch())
>                 __trace_switch_to_guest();
> @@ -206,7 +210,7 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>         if (host_data_test_flag(HAS_SPE))
>                 __debug_restore_spe();
>         if (host_data_test_flag(HAS_BRBE))
> -               __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
> +               __debug_restore_brbe();
>         if (__trace_needs_switch())
>                 __trace_switch_to_host();
>  }
> --
> 2.53.0.1018.g2bb0e51243-goog
>


^ permalink raw reply

* Re: [PATCH v2 0/6] change young flag check functions to return bool
From: David Hildenbrand (Arm) @ 2026-03-27 13:28 UTC (permalink / raw)
  To: Baolin Wang, akpm
  Cc: ljs, Liam.Howlett, vbabka, rppt, surenb, mhocko, linux-arm-kernel,
	x86, linux-parisc, linuxppc-dev, linux-riscv, linux-s390, kvm,
	open, linux-kernel
In-Reply-To: <cover.1774075004.git.baolin.wang@linux.alibaba.com>

On 3/21/26 07:42, Baolin Wang wrote:
> This is a cleanup patchset to change all young flag check functions to
> return bool, as discussed with David in the previous thread[1]. Since
> callers only care about whether the young flag was set, returning bool
> makes the intention clearer. No functional changes intended.
> 
> Ran mm selftests on Arm64 and x86 machines, and no issues were found.

Thanks!

For the whole series:

Acked-by: David Hildenbrand (Arm) <david@kernel.org>

-- 
Cheers,

David


^ permalink raw reply

* Re: [PATCH 1/3] pinctrl: mediatek: Add gpio-range record in pinctrl driver
From: Deep Pani @ 2026-03-27 13:33 UTC (permalink / raw)
  To: Fred-WY Chen (陳威宇),
	andriy.shevchenko@intel.com, Lei Xue (薛磊),
	Mandeep S
  Cc: Qingliang Li (黎晴亮), sean.wang@kernel.org,
	Yaoy Wang (王瑶瑶), AngeloGioacchino Del Regno,
	Yong Mao (毛勇), linux-gpio@vger.kernel.org,
	linux-kernel@vger.kernel.org, linus.walleij@linaro.org,
	linux-arm-kernel@lists.infradead.org,
	linux-mediatek@lists.infradead.org, matthias.bgg@gmail.com,
	Cathy Xu (许华婷),
	Shunxi Zhang (章顺喜),
	Ye Wang (王叶)
In-Reply-To: <cbb135cbd2c6255537fb55e35c39fc5529e7de78.camel@mediatek.com>

Hi Andy,

You mean gpiochip_add_pin_range(), correct?

IIRC, that adds to gpiochip's range, not the range we are using for our
pinctrl driver. 

The range we are utilizing inside our hardware is of the type struct
pinctrl_gpio_range. There is no callback in gpiochip that handles this
type of range

I also recall that gpiochip_add_data() doesn't initialize the hw but
rather initializes the gpiochip from the hw data we will provide in
mtk_build_gpiochip(). Thus  we need a function which will help
initialize the pinctrl_gpio_range inside our pinctrl driver structure.
This is why we make the mtk_pinctrl_gpio_range_init function here.

For the second question, we are keeping it because before ACPI is
invoked we still need some other pins to be configured, especially if
different pins have different styles of pull configuration. The method
we use is to define those configurations in the pinctrl-mt8901.c file
which determines the gpio ranges and maps pinctrl device to acpi, one
set of gpio ranges per configuration, for different type of pull
configurations we have different gpio ranges, this callback helps add
them into the pinctrl subsystem such that other device maintainers can
easily leverage that subsystem to add their resources in their _CRS
calls using the common interfaces. 

Thus we need to keep both the functions.

Thanks and Regards,
Deep Pani

On Thu, 2026-03-26 at 12:33 +0000, Fred-WY Chen (陳威宇) wrote:
> On Wed, 2025-11-26 at 19:06 +0100, Andy Shevchenko wrote:
> > 
> > External email : Please do not click links or open attachments
> > until
> > you have verified the sender or the content.
> > 
> > 
> > On Tue, Nov 25, 2025 at 10:36:34AM +0800, Lei Xue wrote:
> > > Kernel GPIO subsystem mapping hardware pin number to a different
> > > range of gpio number. Add gpio-range structure to hold
> > > the mapped gpio range in pinctrl driver. That enables the kernel
> > > to search a range of mapped gpio range against a pinctrl device.
> > 
> > ...
> > 
> > >  static int mtk_build_gpiochip(struct mtk_pinctrl *hw)
> > >  {
> > >       struct gpio_chip *chip = &hw->chip;
> > 
> > >       if (ret < 0)
> > >               return ret;
> > > 
> > > +     mtk_pinctrl_gpio_range_init(hw, chip);
> > > +
> > >       return 0;
> > 
> > We have a callback for that in struct gpio_chip. Any reason not to
> > use it?
> > 
> > >  }
> > 
> > ...
> > 
> > > +     pinctrl_add_gpio_range(hw->pctrl, &hw->range);
> > 
> > Not sure if this is needed.
> > 
> 
> Hi Deep,
> 
> Could you please check this and feedback?
> 
> Regards,
> Fred-WY Chen
> 
> > --
> > With Best Regards,
> > Andy Shevchenko
> > 
> > 
> 

^ permalink raw reply

* Re: [PATCH 3/3] pinctrl: mediatek: mt8901: Add pinctrl driver for MT8901
From: Deep Pani @ 2026-03-27 13:41 UTC (permalink / raw)
  To: linus.walleij@linaro.org, Fred-WY Chen (陳威宇),
	AngeloGioacchino Del Regno, sean.wang@kernel.org,
	matthias.bgg@gmail.com, Mandeep S, Lei Xue (薛磊)
  Cc: Qingliang Li (黎晴亮),
	Ye Wang (王叶), Yaoy Wang (王瑶瑶),
	Yong Mao (毛勇), linux-gpio@vger.kernel.org,
	Shunxi Zhang (章顺喜),
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-mediatek@lists.infradead.org,
	Cathy Xu (许华婷)
In-Reply-To: <0df339f15f4ba7e55880194edfdec1155f2f20f7.camel@mediatek.com>

Hi Angelo,

MT8901 doesn't use devicetree for gpio pin configuration. ACPI ASL
macros are declared in the device's _CRS methods to define exact
configuration for the gpio pins.

We have and will always make sure ACPI is all good on this platform.

Thanks and Regards,
Deep Pani

On Thu, 2026-03-26 at 12:36 +0000, Fred-WY Chen (陳威宇) wrote:
> On Tue, 2025-11-25 at 10:56 +0100, AngeloGioacchino Del Regno wrote:
> > 
> > External email : Please do not click links or open attachments
> > until
> > you have verified the sender or the content.
> > 
> > 
> > Il 25/11/25 03:36, Lei Xue ha scritto:
> > > Add mt8901 pinctrl, gpio and eint driver implementation.
> > > 
> > > Signed-off-by: Lei Xue <lei.xue@mediatek.com>
> > > ---
> > >   drivers/pinctrl/mediatek/Kconfig              |   12 +
> > >   drivers/pinctrl/mediatek/Makefile             |    1 +
> > >   drivers/pinctrl/mediatek/mtk-eint.c           |    4 +
> > >   drivers/pinctrl/mediatek/mtk-eint.h           |    1 +
> > >   drivers/pinctrl/mediatek/pinctrl-mt8901.c     | 1460
> > > +++++++++++
> > >   drivers/pinctrl/mediatek/pinctrl-mtk-mt8901.h | 2130
> > > +++++++++++++++++
> > >   6 files changed, 3608 insertions(+)
> > >   create mode 100644 drivers/pinctrl/mediatek/pinctrl-mt8901.c
> > >   create mode 100644 drivers/pinctrl/mediatek/pinctrl-mtk-
> > > mt8901.h
> > > 
> > > diff --git a/drivers/pinctrl/mediatek/Kconfig
> > > b/drivers/pinctrl/mediatek/Kconfig
> > > index 4819617d9368..4820ae5197a0 100644
> > > --- a/drivers/pinctrl/mediatek/Kconfig
> > > +++ b/drivers/pinctrl/mediatek/Kconfig
> > > @@ -321,6 +321,18 @@ config PINCTRL_MT8516
> > >       default ARM64 && ARCH_MEDIATEK
> > >       select PINCTRL_MTK
> > > 
> > > +config PINCTRL_MT8901
> > > +     bool "MediaTek MT8901 pin control"
> > > +     depends on ACPI
> > > +     depends on ARM64 || COMPILE_TEST
> > > +     default ARM64 && ARCH_MEDIATEK
> > > +     select PINCTRL_MTK_PARIS
> > > +     help
> > > +       Say yes here to support pin controller and gpio driver
> > > +       on MediaTek MT8901 SoC.
> > > +       In MTK platform, we support virtual gpio and use it to
> > > +       map specific eint which doesn't have real gpio pin.
> > > +
> > >   # For PMIC
> > >   config PINCTRL_MT6397
> > >       bool "MediaTek MT6397 pin control"
> > > diff --git a/drivers/pinctrl/mediatek/Makefile
> > > b/drivers/pinctrl/mediatek/Makefile
> > > index ae765bd99965..57c69b1e5c2d 100644
> > > --- a/drivers/pinctrl/mediatek/Makefile
> > > +++ b/drivers/pinctrl/mediatek/Makefile
> > > @@ -43,3 +43,4 @@ obj-$(CONFIG_PINCTRL_MT8196)                +=
> > > pinctrl-mt8196.o
> > >   obj-$(CONFIG_PINCTRL_MT8365)                += pinctrl-mt8365.o
> > >   obj-$(CONFIG_PINCTRL_MT8516)                += pinctrl-mt8516.o
> > >   obj-$(CONFIG_PINCTRL_MT6397)                += pinctrl-mt6397.o
> > > +obj-$(CONFIG_PINCTRL_MT8901)         += pinctrl-mt8901.o
> > > diff --git a/drivers/pinctrl/mediatek/mtk-eint.c
> > > b/drivers/pinctrl/mediatek/mtk-eint.c
> > > index c8c5097c11c4..b5a5beebf9cd 100644
> > > --- a/drivers/pinctrl/mediatek/mtk-eint.c
> > > +++ b/drivers/pinctrl/mediatek/mtk-eint.c
> > > @@ -71,6 +71,10 @@ const unsigned int debounce_time_mt6878[] = {
> > >   };
> > >   EXPORT_SYMBOL_GPL(debounce_time_mt6878);
> > > 
> > > +const unsigned int debounce_time_mt8901[] = {
> > > +     156, 313, 625, 1250, 20000, 40000, 80000, 160000, 320000,
> > > 640000, 0};
> > > +EXPORT_SYMBOL_GPL(debounce_time_mt8901);
> > > +
> > >   static void __iomem *mtk_eint_get_offset(struct mtk_eint *eint,
> > >                                        unsigned int eint_num,
> > >                                        unsigned int offset)
> > > diff --git a/drivers/pinctrl/mediatek/mtk-eint.h
> > > b/drivers/pinctrl/mediatek/mtk-eint.h
> > > index 3cdd6f6310cd..1b185f660aff 100644
> > > --- a/drivers/pinctrl/mediatek/mtk-eint.h
> > > +++ b/drivers/pinctrl/mediatek/mtk-eint.h
> > > @@ -53,6 +53,7 @@ extern const unsigned int
> > > debounce_time_mt2701[];
> > >   extern const unsigned int debounce_time_mt6765[];
> > >   extern const unsigned int debounce_time_mt6795[];
> > >   extern const unsigned int debounce_time_mt6878[];
> > > +extern const unsigned int debounce_time_mt8901[];
> > > 
> > >   struct mtk_eint;
> > > 
> > > diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8901.c
> > > b/drivers/pinctrl/mediatek/pinctrl-mt8901.c
> > > new file mode 100644
> > > index 000000000000..77dec85fe29b
> > > --- /dev/null
> > > +++ b/drivers/pinctrl/mediatek/pinctrl-mt8901.c
> > > @@ -0,0 +1,1460 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * Copyright (C) 2025 MediaTek Inc.
> > > + *
> > > + */
> > > +
> > > +#include <linux/acpi.h>
> > > +#include <linux/module.h>
> > > +#include "pinctrl-mtk-mt8901.h"
> > > +#include "pinctrl-paris.h"
> > > +
> > 
> > ..snip..
> > 
> > > +static const char * const mt8901_pinctrl_register_base_name[] =
> > > {
> > > +     "iocfg0", "iocfg_lt2", "iocfg_lt3", "iocfg_rt1",
> > > "iocfg_rt2",
> > > "iocfg_rt3",
> > > +     "iocfg_tr", "iocfg_rt0", "iocfg_lt1", "iocfg_lb",
> > > "iocfg_rb",
> > > +};
> > > +
> > > +static const struct mtk_eint_hw mt8901_eint_hw = {
> > > +     .port_mask = 0xf,
> > > +     .ports     = 7,
> > > +     .ap_num    = 209,
> > > +     .db_cnt    = 32,
> > > +     .db_time   = debounce_time_mt8901,
> > > +};
> > > +
> > > +static const struct mtk_pin_soc mt8901_data = {
> > > +     .reg_cal = mt8901_reg_cals,
> > > +     .pins = mtk_pins_mt8901,
> > > +     .npins = ARRAY_SIZE(mtk_pins_mt8901),
> > > +     .ngrps = ARRAY_SIZE(mtk_pins_mt8901),
> > > +     .eint_hw = &mt8901_eint_hw,
> > > +     .eint_pin = eint_pins_mt8901,
> > > +     .nfuncs = 8,
> > > +     .gpio_m = 0,
> > > +     .base_names = mt8901_pinctrl_register_base_name,
> > > +     .nbase_names =
> > > ARRAY_SIZE(mt8901_pinctrl_register_base_name),
> > > +     .pull_type = mt8901_pull_type,
> > > +     .pin_rsel = mt8901_pin_rsel_val_range,
> > > +     .npin_rsel = ARRAY_SIZE(mt8901_pin_rsel_val_range),
> > > /*numsel*/
> > > +     .bias_set_combo = mtk_pinconf_bias_set_combo,
> > > +     .bias_get_combo = mtk_pinconf_bias_get_combo,
> > > +     .drive_set = mtk_pinconf_drive_set_rev1,
> > > +     .drive_get = mtk_pinconf_drive_get_rev1,
> > > +     .adv_drive_set = mtk_pinconf_adv_drive_set_raw,
> > > +     .adv_drive_get = mtk_pinconf_adv_drive_get_raw,
> > > +};
> > > +
> > > +static const struct acpi_device_id mt8901_pinctrl_acpi_match[] =
> > > {
> > > +     {"NVDA9221", (kernel_ulong_t)&mt8901_data },
> > > +     { }
> > > +};
> > > +MODULE_DEVICE_TABLE(acpi, mt8901_pinctrl_acpi_match);
> > > +
> > > +static struct platform_driver mt8901_pinctrl_driver = {
> > > +     .driver = {
> > > +             .name = "mt8901-pinctrl",
> > > +             .acpi_match_table =
> > > ACPI_PTR(mt8901_pinctrl_acpi_match),
> > 
> > Please also add support for devicetree - I have a hunch (and I'm
> > sure
> > that I am
> > not the only one) that ACPI may give some issues at the end of the
> > day, on ARM64.
> > 
> > Of course, I'd hope that ACPI is all good on this platform, but
> > still.... :-)
> > 
> > static const struct of_device_id mt8901_pinctrl_of_match[] = {
> >         { .compatible = "mediatek,mt8901-pinctrl", .data =
> > &mt8901_data },
> >         { /* sentinel */ }
> > };
> > 
> >         .of_match_table = mt8901_pinctrl_of_match,
> > 
> > > +             .pm = pm_sleep_ptr(&mtk_paris_pinctrl_pm_ops)
> > > +     },
> > > +     .probe = mtk_paris_pinctrl_probe,
> > > +};
> > 
> 
> Hi Deep,
> 
> Could you please check and feedback to Angelo?
> 
> Regards,
> Fred-WY Chen
> 
> > Cheers,
> > Angelo
> > 
> > > +
> > > +static int __init mt8901_pinctrl_init(void)
> > > +{
> > > +     return platform_driver_register(&mt8901_pinctrl_driver);
> > > +}
> > > +
> > > +arch_initcall(mt8901_pinctrl_init);
> > > +
> > > +MODULE_LICENSE("GPL");
> > > +MODULE_DESCRIPTION("MediaTek MT8901 Pinctrl Driver");
> 


^ permalink raw reply

* Re: [PATCH v2 0/3] Inline helpers into Rust without full LTO
From: Arnd Bergmann @ 2026-03-27 13:41 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Russell King, Christian Schrefl, Miguel Ojeda, Alice Ryhl,
	Ard Biesheuvel, Jamie Cunliffe, Will Deacon, Catalin Marinas,
	Miguel Ojeda, Andreas Hindborg, acourbot, Andrew Morton,
	Anton Ivanov, Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	David Gow, Gary Guo, Johannes Berg, Justin Stitt,
	linux-arm-kernel, linux-kbuild, linux-kernel, linux-mm, linux-um,
	llvm, Benno Lossin, Mark Rutland, mmaurer, Bill Wendling,
	Nathan Chancellor, Nick Desaulniers, Nicolas Schier,
	Nicolas Schier, Peter Zijlstra, Richard Weinberger,
	rust-for-linux, Trevor Gross, Uladzislau Rezki (Sony),
	John Paul Adrian Glaubitz
In-Reply-To: <93439e91-cf81-477b-b880-a813bb01ad7c@app.fastmail.com>

On Fri, Mar 27, 2026, at 10:02, Arnd Bergmann wrote:
> On Fri, Mar 27, 2026, at 08:56, Geert Uytterhoeven wrote:
> but that only allowed bitfields to be marked as __attribute__((packed))
> in order to get tightly packed fields and return '4' on all architectures,
> while m68k-linux-gcc apparently has all bitfields implicitly packed unless they
> are explicitly marked __attribute__((aligned(x))). This behavior is
> independent of the -malign-int flag.

I had another look and found that this has been in gcc since ELF
support was originally added for m68k:

gcc/config/m68k/linux.h:#undef PCC_BITFIELD_TYPE_MATTERS

All other current Linux/ELF targets get the default from gcc/config/elfos.h

      Arnd


^ permalink raw reply

* [PATCH v2] ASoC: dt-bindings: mediatek,mt8173-rt5650-rt5514: convert to DT schema
From: Khushal Chitturi @ 2026-03-27 13:46 UTC (permalink / raw)
  To: lgirdwood, broonie
  Cc: robh, krzk+dt, conor+dt, matthias.bgg, angelogioacchino.delregno,
	koro.chen, linux-sound, devicetree, linux-kernel,
	linux-arm-kernel, linux-mediatek, Khushal Chitturi

Convert the Mediatek MT8173 with RT5650 and RT5514 sound card
bindings to DT schema.

Signed-off-by: Khushal Chitturi <khushalchitturi@gmail.com>
---
Changelog:
v1 -> v2:
- Used two separate entries for two phandles.
- corrected positioning of additionalProperties.
- Fixed commit message to match subsystem.

Note:
* This patch is part of the GSoC2026 application process for device tree bindings conversions
* https://github.com/LinuxFoundationGSoC/ProjectIdeas/wiki/GSoC-2026-Device-Tree-Bindings

 .../sound/mediatek,mt8173-rt5650-rt5514.yaml  | 41 +++++++++++++++++++
 .../bindings/sound/mt8173-rt5650-rt5514.txt   | 15 -------
 2 files changed, 41 insertions(+), 15 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml
 delete mode 100644 Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5514.txt

diff --git a/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml b/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml
new file mode 100644
index 000000000000..ed698c9ff42b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/mediatek,mt8173-rt5650-rt5514.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT8173 with RT5650 and RT5514 audio codecs
+
+maintainers:
+  - Koro Chen <koro.chen@mediatek.com>
+
+properties:
+  compatible:
+    const: mediatek,mt8173-rt5650-rt5514
+
+  mediatek,audio-codec:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description: Phandles of rt5650 and rt5514 codecs
+    items:
+      - description: phandle of rt5650 codec
+      - description: phandle of rt5514 codec
+
+  mediatek,platform:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: The phandle of MT8173 ASoC platform.
+
+required:
+  - compatible
+  - mediatek,audio-codec
+  - mediatek,platform
+
+additionalProperties: false
+
+examples:
+  - |
+    sound {
+        compatible = "mediatek,mt8173-rt5650-rt5514";
+        mediatek,audio-codec = <&rt5650>, <&rt5514>;
+        mediatek,platform = <&afe>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5514.txt b/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5514.txt
deleted file mode 100644
index e8b3c80c6fff..000000000000
--- a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5514.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-MT8173 with RT5650 RT5514 CODECS
-
-Required properties:
-- compatible : "mediatek,mt8173-rt5650-rt5514"
-- mediatek,audio-codec: the phandles of rt5650 and rt5514 codecs
-- mediatek,platform: the phandle of MT8173 ASoC platform
-
-Example:
-
-	sound {
-		compatible = "mediatek,mt8173-rt5650-rt5514";
-		mediatek,audio-codec = <&rt5650 &rt5514>;
-		mediatek,platform = <&afe>;
-	};
-
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH 4/5] xor/arm64: Use shared NEON intrinsics implementation from 32-bit ARM
From: Christoph Hellwig @ 2026-03-27 13:50 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: linux-raid, linux-arm-kernel, linux-crypto, Ard Biesheuvel,
	Christoph Hellwig, Russell King, Arnd Bergmann, Eric Biggers
In-Reply-To: <20260327113047.4043492-11-ardb+git@google.com>

On Fri, Mar 27, 2026 at 12:30:52PM +0100, Ard Biesheuvel wrote:
> From: Ard Biesheuvel <ardb@kernel.org>
> 
> Tweak the arm64 code so that the pure NEON intrinsics implementation of
> XOR is shared between arm64 and ARM.

Instead of hiding the implementation in a header, just split xor-neon.c
into two .c files, one of which could be built by arm32 as well, probably
in the arm/ instead of the arm64/ subdirectory, but we can also add a
new arm-common one if that's what the arm maintainers prefer.

^ permalink raw reply

* Re: [PATCH v20 06/10] power: reset: Add psci-reboot-mode driver
From: Lorenzo Pieralisi @ 2026-03-27 13:55 UTC (permalink / raw)
  To: Shivendra Pratap
  Cc: Arnd Bergmann, Bjorn Andersson, Sebastian Reichel, Rob Herring,
	Souvik Chakravarty, Krzysztof Kozlowski, Andy Yan,
	Matthias Brugger, Mark Rutland, Conor Dooley, Konrad Dybcio,
	John Stultz, Moritz Fischer, Bartosz Golaszewski, Sudeep Holla,
	Florian Fainelli, Krzysztof Kozlowski, Dmitry Baryshkov,
	Mukesh Ojha, Andre Draszik, Kathiravan Thirumoorthy, linux-pm,
	linux-kernel, linux-arm-kernel, linux-arm-msm, devicetree,
	Srinivas Kandagatla
In-Reply-To: <20260304-arm-psci-system_reset2-vendor-reboots-v20-6-cf7d346b8372@oss.qualcomm.com>

On Wed, Mar 04, 2026 at 11:33:06PM +0530, Shivendra Pratap wrote:
> PSCI supports different types of resets like COLD reset, ARCH WARM
> reset, vendor-specific resets. Currently there is no common driver that
> handles all supported psci resets at one place. Additionally, there is
> no common mechanism to issue the supported psci resets from userspace.
> 
> Add a PSCI reboot mode driver and define two types of PSCI resets in the
> driver as reboot-modes: predefined resets controlled by Linux
> reboot_mode and customizable resets defined by SoC vendors in their
> device tree under the psci:reboot-mode node.
> 
> Register the driver with the reboot-mode framework to interface these
> resets to userspace. When userspace initiates a supported command, pass
> the reset arguments to the PSCI driver to enable command-based reset.
> 
> This change allows userspace to issue supported PSCI reset commands
> using the standard reboot system calls while enabling SoC vendors to
> define their specific resets for PSCI.
> 
> Signed-off-by: Shivendra Pratap <shivendra.pratap@oss.qualcomm.com>
> ---
>  drivers/power/reset/Kconfig            |  10 +++
>  drivers/power/reset/Makefile           |   1 +
>  drivers/power/reset/psci-reboot-mode.c | 119 +++++++++++++++++++++++++++++++++
>  3 files changed, 130 insertions(+)
> 
> diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
> index f6c1bcbb57deff3568d6b1b326454add3b3bbf06..529d6c7d3555601f7b7e6199acd29838030fcef2 100644
> --- a/drivers/power/reset/Kconfig
> +++ b/drivers/power/reset/Kconfig
> @@ -348,6 +348,16 @@ config NVMEM_REBOOT_MODE
>  	  then the bootloader can read it and take different
>  	  action according to the mode.
>  
> +config PSCI_REBOOT_MODE
> +	bool "PSCI reboot mode driver"
> +	depends on OF && ARM_PSCI_FW
> +	select REBOOT_MODE
> +	help
> +	  Say y here will enable PSCI reboot mode driver. This gets
> +          the PSCI reboot mode arguments and passes them to psci
> +	  driver. psci driver uses these arguments for issuing
> +	  device reset into different boot states.
> +
>  config POWER_MLXBF
>  	tristate "Mellanox BlueField power handling driver"
>  	depends on (GPIO_MLXBF2 || GPIO_MLXBF3) && ACPI
> diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
> index 0e4ae6f6b5c55729cf60846d47e6fe0fec24f3cc..49774b42cdf61fd57a5b70f286c65c9d66bbc0cb 100644
> --- a/drivers/power/reset/Makefile
> +++ b/drivers/power/reset/Makefile
> @@ -40,4 +40,5 @@ obj-$(CONFIG_REBOOT_MODE) += reboot-mode.o
>  obj-$(CONFIG_SYSCON_REBOOT_MODE) += syscon-reboot-mode.o
>  obj-$(CONFIG_POWER_RESET_SC27XX) += sc27xx-poweroff.o
>  obj-$(CONFIG_NVMEM_REBOOT_MODE) += nvmem-reboot-mode.o
> +obj-$(CONFIG_PSCI_REBOOT_MODE) += psci-reboot-mode.o
>  obj-$(CONFIG_POWER_MLXBF) += pwr-mlxbf.o
> diff --git a/drivers/power/reset/psci-reboot-mode.c b/drivers/power/reset/psci-reboot-mode.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..86bef195228b0924704c2936b99f6801c14ff1b1
> --- /dev/null
> +++ b/drivers/power/reset/psci-reboot-mode.c
> @@ -0,0 +1,119 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
> + */
> +
> +#include <linux/device/faux.h>
> +#include <linux/device.h>

Nit: swap the two.

> +#include <linux/err.h>
> +#include <linux/of.h>
> +#include <linux/psci.h>
> +#include <linux/reboot.h>
> +#include <linux/reboot-mode.h>
> +#include <linux/types.h>
> +
> +/*
> + * Predefined reboot-modes are defined as per the values
> + * of enum reboot_mode defined in the kernel: reboot.c.
> + */
> +static struct mode_info psci_resets[] = {
> +	{ .mode = "warm", .magic = REBOOT_WARM},
> +	{ .mode = "soft", .magic = REBOOT_SOFT},
> +	{ .mode = "cold", .magic = REBOOT_COLD},
> +};
> +
> +static void psci_reboot_mode_set_predefined_modes(struct reboot_mode_driver *reboot)
> +{
> +	INIT_LIST_HEAD(&reboot->predefined_modes);
> +	for (u32 i = 0; i < ARRAY_SIZE(psci_resets); i++) {
> +		/* Prepare the magic with arg1 as 0 and arg2 as per pre-defined mode */
> +		psci_resets[i].magic = REBOOT_MODE_MAGIC(0, psci_resets[i].magic);

This looks weird to me, why can't we just initialize the array with the values
directly ?

> +		INIT_LIST_HEAD(&psci_resets[i].list);
> +		list_add_tail(&psci_resets[i].list, &reboot->predefined_modes);
> +	}
> +}
> +
> +/*
> + * arg1 is reset_type(Low 32 bit of magic).
> + * arg2 is cookie(High 32 bit of magic).
> + * If reset_type is 0, cookie will be used to decide the reset command.
> + */
> +static int psci_reboot_mode_write(struct reboot_mode_driver *reboot, u64 magic)
> +{
> +	u32 reset_type = REBOOT_MODE_ARG1(magic);
> +	u32 cookie = REBOOT_MODE_ARG2(magic);
> +
> +	if (reset_type == 0) {
> +		if (cookie == REBOOT_WARM || cookie == REBOOT_SOFT)
> +			psci_set_reset_cmd(true, 0, 0);
> +		else
> +			psci_set_reset_cmd(false, 0, 0);
> +	} else {
> +		psci_set_reset_cmd(true, reset_type, cookie);
> +	}

I don't think that psci_set_reset_cmd() has the right interface (and this
nested if is too complicated for my taste). All we need to pass is reset-type
and cookie (and if the reset is one of the predefined ones, reset-type is 0
and cookie is the REBOOT_* cookie).

Then the PSCI firmware driver will take the action according to what
resets are available.

How does it sound ?

> +
> +	return NOTIFY_DONE;
> +}
> +
> +static int psci_reboot_mode_register_device(struct faux_device *fdev)
> +{
> +	struct reboot_mode_driver *reboot;
> +	int ret;
> +
> +	reboot = devm_kzalloc(&fdev->dev, sizeof(*reboot), GFP_KERNEL);
> +	if (!reboot)
> +		return -ENOMEM;
> +
> +	psci_reboot_mode_set_predefined_modes(reboot);
> +	reboot->write = psci_reboot_mode_write;
> +	reboot->dev = &fdev->dev;
> +
> +	ret = devm_reboot_mode_register(&fdev->dev, reboot);
> +	if (ret) {
> +		dev_err_probe(&fdev->dev, ret, "devm_reboot_mode_register failed %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int __init psci_reboot_mode_init(void)
> +{
> +	struct device_node *psci_np;
> +	struct faux_device *fdev;
> +	struct device_node *np;
> +	int ret;
> +
> +	psci_np = of_find_compatible_node(NULL, NULL, "arm,psci-1.0");
> +	if (!psci_np)
> +		return -ENODEV;
> +	/*
> +	 * Look for reboot-mode in the psci node. Even if the reboot-mode
> +	 * node is not defined in psci, continue to register with the
> +	 * reboot-mode driver and let the dev.ofnode be set as NULL.
> +	 */
> +	np = of_find_node_by_name(psci_np, "reboot-mode");
> +
> +	fdev = faux_device_create("psci-reboot-mode", NULL, NULL);

Same comment as Bartosz (have you picked up his work and working towards
a solution) ?

Thanks,
Lorenzo

> +	if (!fdev) {
> +		ret = -ENODEV;
> +		goto error;
> +	}
> +
> +	device_set_node(&fdev->dev, of_fwnode_handle(np));
> +	ret = psci_reboot_mode_register_device(fdev);
> +	if (ret)
> +		goto error;
> +
> +	return 0;
> +
> +error:
> +	of_node_put(np);
> +	if (fdev) {
> +		device_set_node(&fdev->dev, NULL);
> +		faux_device_destroy(fdev);
> +	}
> +
> +	return ret;
> +}
> +device_initcall(psci_reboot_mode_init);
> 
> -- 
> 2.34.1
> 


^ permalink raw reply

* Re: (subset) [PATCH v17 0/8] support FEAT_LSUI
From: Yeoreum Yun @ 2026-03-27 13:56 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arm-kernel, linux-kernel, kvmarm, kvm, linux-kselftest,
	will, maz, oupton, miko.lenczewski, kevin.brodsky, broonie, ardb,
	suzuki.poulose, lpieralisi, joey.gouly, yuzenghui
In-Reply-To: <177461632621.2272468.5197255307509898250.b4-ty@arm.com>

Hi Catalin,

> On Sat, 14 Mar 2026 17:51:25 +0000, Yeoreum Yun wrote:
> > Since Armv9.6, FEAT_LSUI supplies the load/store instructions for
> > previleged level to access to access user memory without clearing
> > PSTATE.PAN bit.
> >
> > This patchset support FEAT_LSUI and applies it mainly in
> > futex atomic operation and others.
> >
> > [...]
>
> Applied to arm64 (for-next/feat_lsui), thanks!

Thanks!

>
> I decided to drop patch [6/8] (arm64: armv8_deprecated: disable swp
> emulation when FEAT_LSUI present). The way FEAT_LSUI support looks now,
> we still have uaccess_enable_privileged() working properly and we could
> even support SWP emulation using exclusives. While it's highly unlikely
> to see both 32-bit EL0 and FEAT_LSUI in practice,

This is one of decisive reason to drop the swp emulation with LSUI
(https://lore.kernel.org/all/aXDbBKhE1SdCW6q4@willie-the-truck/)

However,

> models may support the
> combination and disabling SWP emulation feels pretty artificial.

But I'm not sure this is a sufficient rationale for supporting SWP with LSUI,
since it's highly unlikely to encounter a real CPU that supports both 32-bit EL0
and FEAT_LSUI.

Anyway, it's fair enough to drop 6/8 right now.
But I appreciate whether it would be good to support SWP emulation with
LSUI so that let me respin for it with the former patch.

[...]

--
Sincerely,
Yeoreum Yun


^ permalink raw reply

* Re: [PATCH v2] ASoC: dt-bindings: mediatek,mt8173-rt5650-rt5514: convert to DT schema
From: Krzysztof Kozlowski @ 2026-03-27 13:57 UTC (permalink / raw)
  To: Khushal Chitturi, lgirdwood, broonie
  Cc: robh, krzk+dt, conor+dt, matthias.bgg, angelogioacchino.delregno,
	koro.chen, linux-sound, devicetree, linux-kernel,
	linux-arm-kernel, linux-mediatek
In-Reply-To: <20260327134649.31376-1-khushalchitturi@gmail.com>

On 27/03/2026 14:46, Khushal Chitturi wrote:
> Convert the Mediatek MT8173 with RT5650 and RT5514 sound card
> bindings to DT schema.
> 
> Signed-off-by: Khushal Chitturi <khushalchitturi@gmail.com>
> ---

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

Best regards,
Krzysztof


^ permalink raw reply

* Re: [PATCH v2 1/2] soc: xilinx: Fix race condition in event registration
From: Michal Simek @ 2026-03-27 13:58 UTC (permalink / raw)
  To: Prasanna Kumar T S M, jay.buddhabhatti, marco.crivellari,
	tejas.patel, rajan.vaja, linux-arm-kernel, linux-kernel
In-Reply-To: <20260320060306.1540928-1-ptsm@linux.microsoft.com>



On 3/20/26 07:03, Prasanna Kumar T S M wrote:
> The zynqmp_power driver registers handlers for suspend and subsystem
> restart events using register_event(). However, the work structures
> (zynqmp_pm_init_suspend_work and zynqmp_pm_init_restart_work) used by
> these handlers were allocated and initialized after the registration
> call.
> 
> This created a race window where, if the firmware triggered an event
> immediately after registration but before allocation, the callback
> (suspend_event_callback or subsystem_restart_event_callback) would
> dereference a NULL pointer in work_pending(), leading to a crash.
> 
> Fix this by allocating and initializing the work structures before
> registering the events.
> 
> Fixes: fcf544ac6439 ("soc: xilinx: Add cb event for subsystem restart")
> Signed-off-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
> ---
>   drivers/soc/xilinx/zynqmp_power.c | 43 ++++++++++++-------------------
>   1 file changed, 17 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c
> index 9085db1b480a..9dd938bd01d8 100644
> --- a/drivers/soc/xilinx/zynqmp_power.c
> +++ b/drivers/soc/xilinx/zynqmp_power.c
> @@ -303,18 +303,18 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
>   	 * is not available to use) or -ENODEV(Xilinx Event Manager not compiled),
>   	 * then use ipi-mailbox or interrupt method.
>   	 */
> +	zynqmp_pm_init_suspend_work = devm_kzalloc(&pdev->dev,
> +						   sizeof(struct zynqmp_pm_work_struct),
> +						   GFP_KERNEL);
> +	if (!zynqmp_pm_init_suspend_work)
> +		return -ENOMEM;
> +
> +	INIT_WORK(&zynqmp_pm_init_suspend_work->callback_work,
> +		  zynqmp_pm_init_suspend_work_fn);
> +
>   	ret = register_event(&pdev->dev, PM_INIT_SUSPEND_CB, 0, 0, false,
>   			     suspend_event_callback);
>   	if (!ret) {
> -		zynqmp_pm_init_suspend_work = devm_kzalloc(&pdev->dev,
> -							   sizeof(struct zynqmp_pm_work_struct),
> -							   GFP_KERNEL);
> -		if (!zynqmp_pm_init_suspend_work)
> -			return -ENOMEM;
> -
> -		INIT_WORK(&zynqmp_pm_init_suspend_work->callback_work,
> -			  zynqmp_pm_init_suspend_work_fn);
> -
>   		ret = zynqmp_pm_get_family_info(&pm_family_code);
>   		if (ret < 0)
>   			return ret;
> @@ -326,14 +326,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
>   		else
>   			return -ENODEV;
>   
> -		ret = register_event(&pdev->dev, PM_NOTIFY_CB, node_id, EVENT_SUBSYSTEM_RESTART,
> -				     false, subsystem_restart_event_callback);
> -		if (ret) {
> -			dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n",
> -				ret);
> -			return ret;
> -		}
> -
>   		zynqmp_pm_init_restart_work = devm_kzalloc(&pdev->dev,
>   							   sizeof(struct zynqmp_pm_work_struct),
>   							   GFP_KERNEL);
> @@ -342,19 +334,18 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
>   
>   		INIT_WORK(&zynqmp_pm_init_restart_work->callback_work,
>   			  zynqmp_pm_subsystem_restart_work_fn);
> +
> +		ret = register_event(&pdev->dev, PM_NOTIFY_CB, node_id, EVENT_SUBSYSTEM_RESTART,
> +				     false, subsystem_restart_event_callback);
> +		if (ret) {
> +			dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n",
> +				ret);
> +			return ret;
> +		}
>   	} else if (ret != -EACCES && ret != -ENODEV) {
>   		dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n", ret);
>   		return ret;
>   	} else if (of_property_present(pdev->dev.of_node, "mboxes")) {
> -		zynqmp_pm_init_suspend_work =
> -			devm_kzalloc(&pdev->dev,
> -				     sizeof(struct zynqmp_pm_work_struct),
> -				     GFP_KERNEL);
> -		if (!zynqmp_pm_init_suspend_work)
> -			return -ENOMEM;
> -
> -		INIT_WORK(&zynqmp_pm_init_suspend_work->callback_work,
> -			  zynqmp_pm_init_suspend_work_fn);
>   		client = devm_kzalloc(&pdev->dev, sizeof(*client), GFP_KERNEL);
>   		if (!client)
>   			return -ENOMEM;


Applied both.

Thanks,
Michal


^ permalink raw reply

* Re: [PATCH v20 06/10] power: reset: Add psci-reboot-mode driver
From: Bartosz Golaszewski @ 2026-03-27 13:59 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: Shivendra Pratap, Arnd Bergmann, Bjorn Andersson,
	Sebastian Reichel, Rob Herring, Souvik Chakravarty,
	Krzysztof Kozlowski, Andy Yan, Matthias Brugger, Mark Rutland,
	Conor Dooley, Konrad Dybcio, John Stultz, Moritz Fischer,
	Sudeep Holla, Florian Fainelli, Krzysztof Kozlowski,
	Dmitry Baryshkov, Mukesh Ojha, Andre Draszik,
	Kathiravan Thirumoorthy, linux-pm, linux-kernel, linux-arm-kernel,
	linux-arm-msm, devicetree, Srinivas Kandagatla
In-Reply-To: <acaMPgRALnoUIHMC@lpieralisi>

On Fri, Mar 27, 2026 at 2:55 PM Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
>
> > +
> > +static int __init psci_reboot_mode_init(void)
> > +{
> > +     struct device_node *psci_np;
> > +     struct faux_device *fdev;
> > +     struct device_node *np;
> > +     int ret;
> > +
> > +     psci_np = of_find_compatible_node(NULL, NULL, "arm,psci-1.0");
> > +     if (!psci_np)
> > +             return -ENODEV;
> > +     /*
> > +      * Look for reboot-mode in the psci node. Even if the reboot-mode
> > +      * node is not defined in psci, continue to register with the
> > +      * reboot-mode driver and let the dev.ofnode be set as NULL.
> > +      */
> > +     np = of_find_node_by_name(psci_np, "reboot-mode");
> > +
> > +     fdev = faux_device_create("psci-reboot-mode", NULL, NULL);
>
> Same comment as Bartosz (have you picked up his work and working towards
> a solution) ?
>

Hi Lorenzo!

Yes, I suggested creating an MFD driver binding to the "arm,psci-1.0"
compatible node which will have two cells: one for the existing
cpuidle-domain functionality and a second for the new reboot-mode
driver. This way we'll simply add a platform device as Greg suggested.

Bart


^ permalink raw reply

* [PATCH v4 00/38] KVM: arm64: Add support for protected guest memory with pKVM
From: Will Deacon @ 2026-03-27 13:59 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei

Hi again, folks,

Here's v4 of the pKVM protected memory patches previously posted here:

  v1: https://lore.kernel.org/kvmarm/20260105154939.11041-1-will@kernel.org/
  v2: https://lore.kernel.org/kvmarm/20260119124629.2563-1-will@kernel.org/
  v3: https://lore.kernel.org/r/20260305144351.17071-1-will@kernel.org

Changes since v3 include:

  * Rebased onto v7.0-rc4
  * Remove unused PKVM_ID_FFA
  * Make ARM_PKVM_GUEST depend on DMA_RESTRICTED_POOL
  * Use FAR_TO_FIPA_OFFSET() instead of open-coding it
  * Remove PROTECTED_VM_UAPI config option and update documentation

As before, I've pushed an updated branch with this series:

  https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=kvm/protected-memory

and the kvmtool patches are available at:

  https://git.kernel.org/pub/scm/linux/kernel/git/will/kvmtool.git/log/?h=pkvm

I fully expect to send a v5, as this is the first time Sashiko has had
a chance to chew on this and I'm expecting a roasting.

Cheers,

Will

Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oupton@kernel.org>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Zenghui Yu <yuzenghui@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Quentin Perret <qperret@google.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Cc: Mostafa Saleh <smostafa@google.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>

--->8

Fuad Tabba (1):
  KVM: arm64: Expose self-hosted debug regs as RAZ/WI for protected
    guests

Quentin Perret (1):
  KVM: arm64: Inject SIGSEGV on illegal accesses

Will Deacon (36):
  KVM: arm64: Remove unused PKVM_ID_FFA definition
  KVM: arm64: Don't leak stage-2 page-table if VM fails to init under
    pKVM
  KVM: arm64: Move handle check into pkvm_pgtable_stage2_destroy_range()
  KVM: arm64: Rename __pkvm_pgtable_stage2_unmap()
  KVM: arm64: Don't advertise unsupported features for protected guests
  KVM: arm64: Remove is_protected_kvm_enabled() checks from hypercalls
  KVM: arm64: Ignore MMU notifier callbacks for protected VMs
  KVM: arm64: Prevent unsupported memslot operations on protected VMs
  KVM: arm64: Ignore -EAGAIN when mapping in pages for the pKVM host
  KVM: arm64: Split teardown hypercall into two phases
  KVM: arm64: Introduce __pkvm_host_donate_guest()
  KVM: arm64: Hook up donation hypercall to pkvm_pgtable_stage2_map()
  KVM: arm64: Handle aborts from protected VMs
  KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page()
  KVM: arm64: Hook up reclaim hypercall to pkvm_pgtable_stage2_destroy()
  KVM: arm64: Factor out pKVM host exception injection logic
  KVM: arm64: Support translation faults in inject_host_exception()
  KVM: arm64: Avoid pointless annotation when mapping host-owned pages
  KVM: arm64: Generalise kvm_pgtable_stage2_set_owner()
  KVM: arm64: Introduce host_stage2_set_owner_metadata_locked()
  KVM: arm64: Change 'pkvm_handle_t' to u16
  KVM: arm64: Annotate guest donations with handle and gfn in host
    stage-2
  KVM: arm64: Introduce hypercall to force reclaim of a protected page
  KVM: arm64: Reclaim faulting page from pKVM in spurious fault handler
  KVM: arm64: Return -EFAULT from VCPU_RUN on access to a poisoned pte
  KVM: arm64: Add hvc handler at EL2 for hypercalls from protected VMs
  KVM: arm64: Implement the MEM_SHARE hypercall for protected VMs
  KVM: arm64: Implement the MEM_UNSHARE hypercall for protected VMs
  KVM: arm64: Allow userspace to create protected VMs when pKVM is
    enabled
  KVM: arm64: Add some initial documentation for pKVM
  KVM: arm64: Extend pKVM page ownership selftests to cover guest
    donation
  KVM: arm64: Register 'selftest_vm' in the VM table
  KVM: arm64: Extend pKVM page ownership selftests to cover forced
    reclaim
  KVM: arm64: Extend pKVM page ownership selftests to cover guest hvcs
  KVM: arm64: Rename PKVM_PAGE_STATE_MASK
  drivers/virt: pkvm: Add Kconfig dependency on DMA_RESTRICTED_POOL

 .../admin-guide/kernel-parameters.txt         |   4 +-
 Documentation/virt/kvm/arm/index.rst          |   1 +
 Documentation/virt/kvm/arm/pkvm.rst           | 106 ++++
 arch/arm64/include/asm/kvm_asm.h              |  31 +-
 arch/arm64/include/asm/kvm_host.h             |   9 +-
 arch/arm64/include/asm/kvm_pgtable.h          |  45 +-
 arch/arm64/include/asm/kvm_pkvm.h             |   4 +-
 arch/arm64/include/asm/virt.h                 |   9 +
 arch/arm64/kvm/arm.c                          |  12 +-
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  10 +-
 arch/arm64/kvm/hyp/include/nvhe/memory.h      |  12 +-
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h        |   7 +-
 .../arm64/kvm/hyp/include/nvhe/trap_handler.h |   2 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            | 184 +++---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 585 ++++++++++++++++--
 arch/arm64/kvm/hyp/nvhe/pkvm.c                | 224 ++++++-
 arch/arm64/kvm/hyp/nvhe/switch.c              |   1 +
 arch/arm64/kvm/hyp/nvhe/sys_regs.c            |   8 +
 arch/arm64/kvm/hyp/pgtable.c                  |  33 +-
 arch/arm64/kvm/mmu.c                          | 114 +++-
 arch/arm64/kvm/pkvm.c                         | 151 ++++-
 arch/arm64/mm/fault.c                         |  33 +-
 drivers/virt/coco/pkvm-guest/Kconfig          |   2 +-
 include/uapi/linux/kvm.h                      |   5 +
 24 files changed, 1365 insertions(+), 227 deletions(-)
 create mode 100644 Documentation/virt/kvm/arm/pkvm.rst

-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply

* [PATCH v4 01/38] KVM: arm64: Remove unused PKVM_ID_FFA definition
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

Commit 7cbf7c37718e ("KVM: arm64: Drop pkvm_mem_transition for host/hyp
sharing") removed the last users of PKVM_ID_FFA, so drop the definition
altogether.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 5f9d56754e39..7f25f2bca90c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -27,7 +27,6 @@ extern struct host_mmu host_mmu;
 enum pkvm_component_id {
 	PKVM_ID_HOST,
 	PKVM_ID_HYP,
-	PKVM_ID_FFA,
 };
 
 extern unsigned long hyp_nr_cpus;
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 02/38] KVM: arm64: Don't leak stage-2 page-table if VM fails to init under pKVM
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

If pkvm_init_host_vm() fails, we should free the stage-2 page-table
previously allocated by kvm_init_stage2_mmu().

Cc: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Fixes: 07aeb70707b1 ("KVM: arm64: Reserve pKVM handle during pkvm_init_host_vm()")
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/arm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 410ffd41fd73..3589fc08266c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -236,7 +236,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		 */
 		ret = pkvm_init_host_vm(kvm);
 		if (ret)
-			goto err_free_cpumask;
+			goto err_uninit_mmu;
 	}
 
 	kvm_vgic_early_init(kvm);
@@ -252,6 +252,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	return 0;
 
+err_uninit_mmu:
+	kvm_uninit_stage2_mmu(kvm);
 err_free_cpumask:
 	free_cpumask_var(kvm->arch.supported_cpus);
 err_unshare_kvm:
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 03/38] KVM: arm64: Move handle check into pkvm_pgtable_stage2_destroy_range()
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

When pKVM is enabled, a VM has a 'handle' allocated by the hypervisor
in kvm_arch_init_vm() and released later by kvm_arch_destroy_vm().

Consequently, the only time __pkvm_pgtable_stage2_unmap() can run into
an uninitialised 'handle' is on the kvm_arch_init_vm() failure path,
where we destroy the empty stage-2 page-table if we fail to allocate a
handle.

Move the handle check into pkvm_pgtable_stage2_destroy_range(), which
will additionally handle protected VMs in subsequent patches.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/pkvm.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index d7a0f69a9982..7797813f4dbe 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -329,9 +329,6 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
 	struct pkvm_mapping *mapping;
 	int ret;
 
-	if (!handle)
-		return 0;
-
 	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
 		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
 					mapping->nr_pages);
@@ -347,6 +344,12 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
 void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
 					u64 addr, u64 size)
 {
+	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+	pkvm_handle_t handle = kvm->arch.pkvm.handle;
+
+	if (!handle)
+		return;
+
 	__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
 }
 
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 05/38] KVM: arm64: Don't advertise unsupported features for protected guests
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

Both SVE and PMUv3 are treated as "restricted" features for protected
guests and attempts to access their corresponding architectural state
from a protected guest result in an undefined exception being injected
by the hypervisor.

Since these exceptions are unexpected and typically fatal for the guest,
don't advertise these features for protected guests.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_pkvm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 757076ad4ec9..7041e398fb4c 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -40,8 +40,6 @@ static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext)
 	case KVM_CAP_MAX_VCPU_ID:
 	case KVM_CAP_MSI_DEVID:
 	case KVM_CAP_ARM_VM_IPA_SIZE:
-	case KVM_CAP_ARM_PMU_V3:
-	case KVM_CAP_ARM_SVE:
 	case KVM_CAP_ARM_PTRAUTH_ADDRESS:
 	case KVM_CAP_ARM_PTRAUTH_GENERIC:
 		return true;
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 06/38] KVM: arm64: Expose self-hosted debug regs as RAZ/WI for protected guests
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

From: Fuad Tabba <tabba@google.com>

Debug and trace are not currently supported for protected guests, so
trap accesses to the related registers and emulate them as RAZ/WI for
now. Although this isn't strictly compatible with the architecture, it's
sufficient for Linux guests and means that debug support can be added
later on.

Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/hyp/nvhe/sys_regs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 06d28621722e..0a84140afa28 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -392,6 +392,14 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
 	/* Cache maintenance by set/way operations are restricted. */
 
 	/* Debug and Trace Registers are restricted. */
+	RAZ_WI(SYS_DBGBVRn_EL1(0)),
+	RAZ_WI(SYS_DBGBCRn_EL1(0)),
+	RAZ_WI(SYS_DBGWVRn_EL1(0)),
+	RAZ_WI(SYS_DBGWCRn_EL1(0)),
+	RAZ_WI(SYS_MDSCR_EL1),
+	RAZ_WI(SYS_OSLAR_EL1),
+	RAZ_WI(SYS_OSLSR_EL1),
+	RAZ_WI(SYS_OSDLR_EL1),
 
 	/* Group 1 ID registers */
 	HOST_HANDLED(SYS_REVIDR_EL1),
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 04/38] KVM: arm64: Rename __pkvm_pgtable_stage2_unmap()
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

In preparation for adding support for protected VMs, where pages are
donated rather than shared, rename __pkvm_pgtable_stage2_unmap() to
__pkvm_pgtable_stage2_unshare() to make it clearer about what is going
on.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/pkvm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 7797813f4dbe..42f6e50825ac 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -322,7 +322,7 @@ int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 	return 0;
 }
 
-static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
+static int __pkvm_pgtable_stage2_unshare(struct kvm_pgtable *pgt, u64 start, u64 end)
 {
 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
@@ -350,7 +350,7 @@ void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
 	if (!handle)
 		return;
 
-	__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+	__pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
 }
 
 void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
@@ -386,7 +386,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 			return -EAGAIN;
 
 		/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
-		ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+		ret = __pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
 		if (ret)
 			return ret;
 		mapping = NULL;
@@ -409,7 +409,7 @@ int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
 	lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
 
-	return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+	return __pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
 }
 
 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 07/38] KVM: arm64: Remove is_protected_kvm_enabled() checks from hypercalls
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

When pKVM is not enabled, the host shouldn't issue pKVM-specific
hypercalls and so there's no point checking for this in the pKVM
hypercall handlers.

Remove the redundant is_protected_kvm_enabled() checks from each
hypercall and instead rejig the hypercall table so that the
pKVM-specific hypercalls are unreachable when pKVM is not being used.

Reviewed-by: Quentin Perret <qperret@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h   | 24 +++++++-----
 arch/arm64/kvm/hyp/nvhe/hyp-main.c | 63 ++++++++++--------------------
 2 files changed, 34 insertions(+), 53 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index a1ad12c72ebf..7b72aac4730d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -51,7 +51,7 @@
 #include <linux/mm.h>
 
 enum __kvm_host_smccc_func {
-	/* Hypercalls available only prior to pKVM finalisation */
+	/* Hypercalls that are unavailable once pKVM has finalised. */
 	/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
 	__KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
 	__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
@@ -60,16 +60,9 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
 	__KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
+	__KVM_HOST_SMCCC_FUNC_MIN_PKVM = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
 
-	/* Hypercalls available after pKVM finalisation */
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
-	__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
+	/* Hypercalls that are always available and common to [nh]VHE/pKVM. */
 	__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
 	__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
 	__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
@@ -81,6 +74,17 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+	__KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM = __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+
+	/* Hypercalls that are available only when pKVM has finalised. */
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
+	__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
 	__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index e7790097db93..127decc2dd2b 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -169,9 +169,6 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
 	DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
 	struct pkvm_hyp_vcpu *hyp_vcpu;
 
-	if (!is_protected_kvm_enabled())
-		return;
-
 	hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
 	if (!hyp_vcpu)
 		return;
@@ -188,12 +185,8 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
 
 static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
 {
-	struct pkvm_hyp_vcpu *hyp_vcpu;
+	struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 
-	if (!is_protected_kvm_enabled())
-		return;
-
-	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 	if (hyp_vcpu)
 		pkvm_put_hyp_vcpu(hyp_vcpu);
 }
@@ -257,9 +250,6 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
 	struct pkvm_hyp_vcpu *hyp_vcpu;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
 		goto out;
@@ -281,9 +271,6 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
 	struct pkvm_hyp_vm *hyp_vm;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vm = get_np_pkvm_hyp_vm(handle);
 	if (!hyp_vm)
 		goto out;
@@ -301,9 +288,6 @@ static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ct
 	struct pkvm_hyp_vcpu *hyp_vcpu;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
 		goto out;
@@ -321,9 +305,6 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt
 	struct pkvm_hyp_vm *hyp_vm;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vm = get_np_pkvm_hyp_vm(handle);
 	if (!hyp_vm)
 		goto out;
@@ -343,9 +324,6 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho
 	struct pkvm_hyp_vm *hyp_vm;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vm = get_np_pkvm_hyp_vm(handle);
 	if (!hyp_vm)
 		goto out;
@@ -362,9 +340,6 @@ static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
 	struct pkvm_hyp_vcpu *hyp_vcpu;
 	int ret = -EINVAL;
 
-	if (!is_protected_kvm_enabled())
-		goto out;
-
 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
 		goto out;
@@ -424,12 +399,8 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
-	struct pkvm_hyp_vm *hyp_vm;
+	struct pkvm_hyp_vm *hyp_vm = get_np_pkvm_hyp_vm(handle);
 
-	if (!is_protected_kvm_enabled())
-		return;
-
-	hyp_vm = get_np_pkvm_hyp_vm(handle);
 	if (!hyp_vm)
 		return;
 
@@ -603,14 +574,6 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__vgic_v3_get_gic_config),
 	HANDLE_FUNC(__pkvm_prot_finalize),
 
-	HANDLE_FUNC(__pkvm_host_share_hyp),
-	HANDLE_FUNC(__pkvm_host_unshare_hyp),
-	HANDLE_FUNC(__pkvm_host_share_guest),
-	HANDLE_FUNC(__pkvm_host_unshare_guest),
-	HANDLE_FUNC(__pkvm_host_relax_perms_guest),
-	HANDLE_FUNC(__pkvm_host_wrprotect_guest),
-	HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
-	HANDLE_FUNC(__pkvm_host_mkyoung_guest),
 	HANDLE_FUNC(__kvm_adjust_pc),
 	HANDLE_FUNC(__kvm_vcpu_run),
 	HANDLE_FUNC(__kvm_flush_vm_context),
@@ -622,6 +585,15 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_timer_set_cntvoff),
 	HANDLE_FUNC(__vgic_v3_save_aprs),
 	HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
+
+	HANDLE_FUNC(__pkvm_host_share_hyp),
+	HANDLE_FUNC(__pkvm_host_unshare_hyp),
+	HANDLE_FUNC(__pkvm_host_share_guest),
+	HANDLE_FUNC(__pkvm_host_unshare_guest),
+	HANDLE_FUNC(__pkvm_host_relax_perms_guest),
+	HANDLE_FUNC(__pkvm_host_wrprotect_guest),
+	HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
+	HANDLE_FUNC(__pkvm_host_mkyoung_guest),
 	HANDLE_FUNC(__pkvm_reserve_vm),
 	HANDLE_FUNC(__pkvm_unreserve_vm),
 	HANDLE_FUNC(__pkvm_init_vm),
@@ -635,7 +607,7 @@ static const hcall_t host_hcall[] = {
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(unsigned long, id, host_ctxt, 0);
-	unsigned long hcall_min = 0;
+	unsigned long hcall_min = 0, hcall_max = -1;
 	hcall_t hfn;
 
 	/*
@@ -647,14 +619,19 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
 	 * basis. This is all fine, however, since __pkvm_prot_finalize
 	 * returns -EPERM after the first call for a given CPU.
 	 */
-	if (static_branch_unlikely(&kvm_protected_mode_initialized))
-		hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
+	if (static_branch_unlikely(&kvm_protected_mode_initialized)) {
+		hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM;
+	} else {
+		hcall_max = __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM;
+	}
 
 	id &= ~ARM_SMCCC_CALL_HINTS;
 	id -= KVM_HOST_SMCCC_ID(0);
 
-	if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
+	if (unlikely(id < hcall_min || id > hcall_max ||
+		     id >= ARRAY_SIZE(host_hcall))) {
 		goto inval;
+	}
 
 	hfn = host_hcall[id];
 	if (unlikely(!hfn))
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

* [PATCH v4 08/38] KVM: arm64: Ignore MMU notifier callbacks for protected VMs
From: Will Deacon @ 2026-03-27 14:00 UTC (permalink / raw)
  To: kvmarm
  Cc: linux-arm-kernel, Will Deacon, Marc Zyngier, Oliver Upton,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Quentin Perret, Fuad Tabba, Vincent Donnefort, Mostafa Saleh,
	Alexandru Elisei
In-Reply-To: <20260327140039.21228-1-will@kernel.org>

In preparation for supporting the donation of pinned pages to protected
VMs, return early from the MMU notifiers when called for a protected VM,
as the necessary hypercalls are exposed only for non-protected guests.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/mmu.c  |  9 ++++++---
 arch/arm64/kvm/pkvm.c | 19 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 17d64a1e11e5..5e7821fe0fc4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -340,6 +340,9 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
 void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
 			    u64 size, bool may_block)
 {
+	if (kvm_vm_is_protected(kvm_s2_mmu_to_kvm(mmu)))
+		return;
+
 	__unmap_stage2_range(mmu, start, size, may_block);
 }
 
@@ -2223,7 +2226,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (!kvm->arch.mmu.pgt)
+	if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
 		return false;
 
 	__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
@@ -2238,7 +2241,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	u64 size = (range->end - range->start) << PAGE_SHIFT;
 
-	if (!kvm->arch.mmu.pgt)
+	if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
 		return false;
 
 	return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
@@ -2254,7 +2257,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	u64 size = (range->end - range->start) << PAGE_SHIFT;
 
-	if (!kvm->arch.mmu.pgt)
+	if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
 		return false;
 
 	return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 42f6e50825ac..20d50abb3b94 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -407,7 +407,12 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 
 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
-	lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
+	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+
+	if (WARN_ON(kvm_vm_is_protected(kvm)))
+		return -EPERM;
+
+	lockdep_assert_held_write(&kvm->mmu_lock);
 
 	return __pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
 }
@@ -419,6 +424,9 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	struct pkvm_mapping *mapping;
 	int ret = 0;
 
+	if (WARN_ON(kvm_vm_is_protected(kvm)))
+		return -EPERM;
+
 	lockdep_assert_held(&kvm->mmu_lock);
 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
 		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
@@ -450,6 +458,9 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
 	struct pkvm_mapping *mapping;
 	bool young = false;
 
+	if (WARN_ON(kvm_vm_is_protected(kvm)))
+		return -EPERM;
+
 	lockdep_assert_held(&kvm->mmu_lock);
 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
 		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
@@ -461,12 +472,18 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
 				    enum kvm_pgtable_walk_flags flags)
 {
+	if (WARN_ON(kvm_vm_is_protected(kvm_s2_mmu_to_kvm(pgt->mmu))))
+		return -EPERM;
+
 	return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
 }
 
 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
 				 enum kvm_pgtable_walk_flags flags)
 {
+	if (WARN_ON(kvm_vm_is_protected(kvm_s2_mmu_to_kvm(pgt->mmu))))
+		return;
+
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
 }
 
-- 
2.53.0.1018.g2bb0e51243-goog



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox