Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next v7 01/12] net: phylink: keep and use MAC supported_interfaces in phylink struct
From: Christian Marangi @ 2026-06-15 12:29 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Simon Horman, Jonathan Corbet, Shuah Khan, Christian Marangi,
	Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
	Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
	linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260615122950.22281-1-ansuelsmth@gmail.com>

Add in phylink struct a copy of supported_interfaces from phylink_config
and make use of that instead of relying on phylink_config value.

This in preparation for support of PCS handling internally to phylink
where a PCS can be removed or added after the phylink is created and we
need both a reference of the supported_interfaces value from
phylink_config and an internal value that can be updated with the new
PCS info.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 drivers/net/phy/phylink.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 087ac63f9193..4d59c0dd78db 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -60,6 +60,11 @@ struct phylink {
 	/* The link configuration settings */
 	struct phylink_link_state link_config;
 
+	/* What interface are supported by the current link.
+	 * Can change on removal or addition of new PCS.
+	 */
+	DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
+
 	/* The current settings */
 	phy_interface_t cur_interface;
 
@@ -629,7 +634,7 @@ static int phylink_validate_mask(struct phylink *pl, struct phy_device *phy,
 static int phylink_validate(struct phylink *pl, unsigned long *supported,
 			    struct phylink_link_state *state)
 {
-	const unsigned long *interfaces = pl->config->supported_interfaces;
+	const unsigned long *interfaces = pl->supported_interfaces;
 
 	if (state->interface == PHY_INTERFACE_MODE_NA)
 		return phylink_validate_mask(pl, NULL, supported, state,
@@ -1868,6 +1873,9 @@ struct phylink *phylink_create(struct phylink_config *config,
 	mutex_init(&pl->state_mutex);
 	INIT_WORK(&pl->resolve, phylink_resolve);
 
+	phy_interface_copy(pl->supported_interfaces,
+			   config->supported_interfaces);
+
 	pl->config = config;
 	if (config->type == PHYLINK_NETDEV) {
 		pl->netdev = to_net_dev(config->dev);
@@ -2026,7 +2034,7 @@ static int phylink_validate_phy(struct phylink *pl, struct phy_device *phy,
 		 * those which the host supports.
 		 */
 		phy_interface_and(interfaces, phy->possible_interfaces,
-				  pl->config->supported_interfaces);
+				  pl->supported_interfaces);
 
 		if (phy_interface_empty(interfaces)) {
 			phylink_err(pl, "PHY has no common interfaces\n");
@@ -2828,12 +2836,12 @@ static phy_interface_t phylink_sfp_select_interface(struct phylink *pl,
 		return interface;
 	}
 
-	if (!test_bit(interface, pl->config->supported_interfaces)) {
+	if (!test_bit(interface, pl->supported_interfaces)) {
 		phylink_err(pl,
 			    "selection of interface failed, SFP selected %s (%u) but MAC supports %*pbl\n",
 			    phy_modes(interface), interface,
 			    (int)PHY_INTERFACE_MODE_MAX,
-			    pl->config->supported_interfaces);
+			    pl->supported_interfaces);
 		return PHY_INTERFACE_MODE_NA;
 	}
 
@@ -3761,14 +3769,14 @@ static int phylink_sfp_config_optical(struct phylink *pl)
 
 	phylink_dbg(pl, "optical SFP: interfaces=[mac=%*pbl, sfp=%*pbl]\n",
 		    (int)PHY_INTERFACE_MODE_MAX,
-		    pl->config->supported_interfaces,
+		    pl->supported_interfaces,
 		    (int)PHY_INTERFACE_MODE_MAX,
 		    pl->sfp_interfaces);
 
 	/* Find the union of the supported interfaces by the PCS/MAC and
 	 * the SFP module.
 	 */
-	phy_interface_and(pl->sfp_interfaces, pl->config->supported_interfaces,
+	phy_interface_and(pl->sfp_interfaces, pl->supported_interfaces,
 			  pl->sfp_interfaces);
 	if (phy_interface_empty(pl->sfp_interfaces)) {
 		phylink_err(pl, "unsupported SFP module: no common interface modes\n");
@@ -3939,7 +3947,7 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
 
 	/* Set the PHY's host supported interfaces */
 	phy_interface_and(phy->host_interfaces, phylink_sfp_interfaces,
-			  pl->config->supported_interfaces);
+			  pl->supported_interfaces);
 
 	/* Do the initial configuration */
 	return phylink_sfp_config_phy(pl, phy);
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH net-next v7 05/12] net: phylink: support late PCS provider attach
From: Maxime Chevallier @ 2026-06-15 14:07 UTC (permalink / raw)
  To: Christian Marangi, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Simon Horman, Jonathan Corbet, Shuah Khan,
	Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
	Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
	linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260615122950.22281-6-ansuelsmth@gmail.com>

Hi Christian,

On 6/15/26 14:29, Christian Marangi wrote:
> Add support for late PCS provider attachment to a phylink instance.
> This works by creating a global notifier for the PCS provider and
> making each phylink instance that makes use of fwnode subscribe to
> this notifier.
> 
> The PCS notifier will emit the event FWNODE_PCS_PROVIDER_ADD every time
> a new PCS provider is added.
> 
> phylink will then react to this event and will call the new function
> fwnode_phylink_pcs_get_from_fwnode() that will check if the PCS fwnode
> provided by the event is present in the pcs-handle property of the
> phylink instance.
> 
> If a related PCS is found, then such PCS is added to the phylink
> instance PCS list.
> 
> Then we link the PCS to the phylink instance and we refresh the supported
> interfaces of the phylink instance.
> 
> Finally we check if we are in a major_config_failed scenario and trigger
> an interface reconfiguration in the next phylink resolve.
> 
> In the example scenario where the link was previously torn down due to
> removal of PCS, the link will be established again as the PCS came back
> and is now available to phylink.
> 
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---

[...]

> @@ -2151,6 +2204,10 @@ void phylink_destroy(struct phylink *pl)
>  	if (pl->link_gpio)
>  		gpiod_put(pl->link_gpio);
>  
> +	/* Unregister notifier for late PCS attach */
> +	if (pl->fwnode_pcs_nb.notifier_call)
> +		unregister_fwnode_pcs_notifier(&pl->fwnode_pcs_nb);

I wanted to try this out, but I get :

drivers/net/phy/phylink.c:2218:17: error: implicit declaration of function ‘unregister_fwnode_pcs_notifier’; did you mean ‘register_fwnode_pcs_notifier’? [-Werror=implicit-function-declaration]
 2218 |                 unregister_fwnode_pcs_notifier(&pl->fwnode_pcs_nb);
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                 register_fwnode_pcs_notifier

I guess you either need to stub this, or there's a missing Kconfig
dependency somewhere

Maxime





^ permalink raw reply

* [PATCH] media: meson: vdec: fix use-after-free of decode work in stop/close path
From: Doruk Tan Ozturk @ 2026-06-15 14:05 UTC (permalink / raw)
  To: neil.armstrong, mchehab, gregkh, khilman
  Cc: jbrunet, martin.blumenstingl, mjourdan, hverkuil, linux-media,
	linux-amlogic, linux-staging, linux-arm-kernel, linux-kernel,
	Doruk Tan Ozturk, stable

vdec_close() calls v4l2_m2m_ctx_release() and then kfree(sess) without
ever cancelling sess->esparser_queue_work. The worker
esparser_queue_all_src() takes sess->lock and walks the source buffers
of sess->m2m_ctx, so if it is still pending or running when the session
is torn down it dereferences freed memory.

The work is (re-)armed from several places, including amvdec_buf_done(),
which runs from the decode-completion/IRQ path. That makes the obvious
fixes insufficient:

  - v4l2_m2m_ctx_release() frees m2m_ctx (and runs stop_streaming via
    vb2_queue_release()), but never cancels the work. Cancelling in
    vdec_close() after v4l2_m2m_ctx_release() would wait on a worker that
    may already be dereferencing the now-freed m2m_ctx.

  - Cancelling in vdec_close() before v4l2_m2m_ctx_release() keeps
    m2m_ctx valid, but the hardware is still live, so amvdec_buf_done()
    can re-arm the work right after the cancel, reintroducing the UAF.

Cancel the work in vdec_stop_streaming() instead, right after
vdec_poweroff() has quiesced the hardware (so its IRQ can no longer
re-arm the work) and while sess->m2m_ctx is still valid. Because
v4l2_m2m_ctx_release() always tears the queues down through
vb2_queue_release() -> __vb2_queue_cancel() -> stop_streaming, this
single cancel covers both the STREAMOFF and the close paths.

This does not deadlock: the queue lock (sess->lock, shared by both vb2
queues) is taken by the worker, but neither the STREAMOFF path
(video_ioctl2 serialises on vdev->lock == core->lock, and
v4l2_m2m_streamoff() calls the lockless vb2_streamoff()) nor the close
path (vb2_queue_release()) holds sess->lock when stop_streaming runs, so
cancel_work_sync() can safely wait for the worker.

Fixes: 3e7f51bd9607 ("media: meson: add v4l2 m2m video decoder driver")
Cc: stable@vger.kernel.org
Signed-off-by: Doruk Tan Ozturk <doruk@0sec.ai>
---
 drivers/staging/media/meson/vdec/vdec.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c
index 4b77ec1af5a7..42822064cf8d 100644
--- a/drivers/staging/media/meson/vdec/vdec.c
+++ b/drivers/staging/media/meson/vdec/vdec.c
@@ -419,6 +419,16 @@ static void vdec_stop_streaming(struct vb2_queue *q)
 		sess->status = STATUS_STOPPED;
 	}

+	/*
+	 * The esparser_queue_work worker dereferences sess->m2m_ctx and
+	 * sess->lock. The hardware (and its IRQ, which re-arms the work via
+	 * amvdec_buf_done()) has been quiesced by vdec_poweroff() above, so
+	 * no new work can be scheduled past this point. m2m_ctx is still
+	 * valid here. Wait for any in-flight worker to finish before the
+	 * buffers and (on the close path) m2m_ctx are torn down.
+	 */
+	cancel_work_sync(&sess->esparser_queue_work);
+
 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
 		while ((buf = v4l2_m2m_src_buf_remove(sess->m2m_ctx)))
 			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
-- 
2.43.0

^ permalink raw reply related

* [PATCH] media: mediatek: vcodec: fix use-after-free in decoder release path
From: Doruk Tan Ozturk @ 2026-06-15 14:05 UTC (permalink / raw)
  To: tiffany.lin, andrew-ct.chen, yunfei.dong, mchehab, matthias.bgg,
	angelogioacchino.delregno
  Cc: hverkuil+cisco, linux-media, linux-mediatek, linux-arm-kernel,
	linux-kernel, Doruk Tan Ozturk, stable

fops_vcodec_release() frees the decoder context with kfree(ctx) but
never cancels the per-context decode_work worker first. Although
v4l2_m2m_ctx_release() waits for any in-flight m2m job to finish, the
workqueue handler (mtk_vdec_worker) may still be running and accessing
the context after v4l2_m2m_job_finish() returns. Once kfree(ctx) runs,
that worker dereferences freed memory, resulting in a use-after-free.

Cancel the pending decode work with cancel_work_sync(&ctx->decode_work)
after the controls and m2m context are torn down and before kfree(ctx),
mirroring the fix already applied to the encoder release path in
commit 76e35091ffc7 ("media: mediatek: vcodec: fix use-after-free in encoder release path").

decode_work is always initialised before release can run:
fops_vcodec_open() calls mtk_vcodec_dec_set_default_params() (its only
caller) unconditionally after a successful v4l2_m2m_ctx_init(), and that
function runs INIT_WORK(&ctx->decode_work, ...). A context can only reach
fops_vcodec_release() via an open() that returned 0, i.e. one that passed
that INIT_WORK. cancel_work_sync() on a properly initialised work_struct
is therefore always safe, even if the work was never queued. This is
unlike the 2023 msg_queue->core_work regression, where the work item
could be uninitialised at cancel time.

Fixes: 590577a4e525 ("[media] vcodec: mediatek: Add Mediatek V4L2 Video Decoder Driver")
Cc: stable@vger.kernel.org
Signed-off-by: Doruk Tan Ozturk <doruk@0sec.ai>
---
 .../mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c         | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
index e936ed8dffbaf..30906b24c608a 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
@@ -313,6 +313,15 @@ static int fops_vcodec_release(struct file *file)
 	v4l2_fh_exit(&ctx->fh);
 	v4l2_ctrl_handler_free(&ctx->ctrl_hdl);

+	/*
+	 * Cancel any pending decode work before freeing the context.
+	 * Although v4l2_m2m_ctx_release() waits for m2m job completion,
+	 * the workqueue handler (mtk_vdec_worker) may still be accessing
+	 * the context after v4l2_m2m_job_finish() returns. Without this,
+	 * a use-after-free occurs when the worker accesses ctx after kfree.
+	 */
+	cancel_work_sync(&ctx->decode_work);
+
 	mtk_vcodec_dbgfs_remove(dev, ctx->id);
 	spin_lock_irqsave(&dev->dev_ctx_lock, flags);
 	list_del_init(&ctx->list);
-- 
2.43.0

^ permalink raw reply related

* Re: [PATCH v1] MAINTAINERS: Add git tree for TI K3 ARCHITECTURE
From: Francesco Dolcini @ 2026-06-15 13:55 UTC (permalink / raw)
  To: Nishanth Menon
  Cc: Francesco Dolcini, Vignesh Raghavendra, Tero Kristo,
	linux-arm-kernel, linux-kernel, Francesco Dolcini
In-Reply-To: <20260615134608.adr62go2doeggkaj@margin>

On Mon, Jun 15, 2026 at 08:46:08AM -0500, Nishanth Menon wrote:
> On 15:26-20260615, Francesco Dolcini wrote:
> > From: Francesco Dolcini <francesco.dolcini@toradex.com>
> > 
> > Add git tree for TI K3 architecture.
> > 
> > Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
> 
> Thanks for adding this in..
> > ---
> >  MAINTAINERS | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 8629ed2aa82f..cbc56dc242b9 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3756,6 +3756,7 @@ M:	Vignesh Raghavendra <vigneshr@ti.com>
> >  M:	Tero Kristo <kristo@kernel.org>
> >  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
> >  S:	Supported
> > +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
> 
> Is it better to use https?
> https://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git ?

I followed what is used in 95% of the instances for git.kernel.org,
and we have git:// for the other 2 instances of the TI git tree.

If there is a value on using https:// it should be done on the whole file.

My preference would be to merge this as it is, if you disagree I can
send a v2.

Francesco

								  


^ permalink raw reply

* [PATCH resend] drm: uapi: Add macro for chipset specific event ID region
From: Bence Csokas @ 2026-06-15 13:52 UTC (permalink / raw)
  To: dri-devel, linux-kernel, linux-arm-kernel, linux-samsung-soc
  Cc: Bence Csokas, Daniel Kiss, David Airlie, Simona Vetter,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, Inki Dae,
	Seung-Woo Kim, Kyungmin Park, Krzysztof Kozlowski, Alim Akhtar,
	Zack Rusin, Broadcom internal kernel review list

uapi/drm/drm.h states:

    Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
    up are chipset specific.

However, this distinction was not put in the code. To elevate the contract
between the generic DRM framework and the driver from the comment to code,
put this in a macro for clarity and convenience.

Cc: Daniel Kiss <Daniel.Kiss@arm.com>
Signed-off-by: Bence Csokas <bence.csokas@arm.com>
---
 include/uapi/drm/drm.h         | 8 ++++++++
 include/uapi/drm/exynos_drm.h  | 4 ++--
 include/uapi/drm/virtgpu_drm.h | 2 +-
 include/uapi/drm/vmwgfx_drm.h  | 2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 27cc159c1d27..aa745e643ef4 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1419,6 +1419,14 @@ struct drm_event {
  * The event payload is a struct drm_event_crtc_sequence.
  */
 #define DRM_EVENT_CRTC_SEQUENCE	0x03
+/**
+ * DRM_EVENT_VENDOR_SPECIFIC - vendor/chipset specific event
+ *
+ * These event IDs are reserved for chipset and driver specific events.
+ *
+ * Refer to the chipset driver's header for details and payload struct.
+ */
+#define DRM_EVENT_VENDOR_SPECIFIC(_v) ((_v) | 0x80000000)
 
 struct drm_event_vblank {
 	struct drm_event base;
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index a51aa1c618c1..8d3156fb129c 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -395,8 +395,8 @@ struct drm_exynos_ioctl_ipp_commit {
 		DRM_EXYNOS_IPP_COMMIT, struct drm_exynos_ioctl_ipp_commit)
 
 /* Exynos specific events */
-#define DRM_EXYNOS_G2D_EVENT		0x80000000
-#define DRM_EXYNOS_IPP_EVENT		0x80000002
+#define DRM_EXYNOS_G2D_EVENT		DRM_EVENT_VENDOR_SPECIFIC(0x0)
+#define DRM_EXYNOS_IPP_EVENT		DRM_EVENT_VENDOR_SPECIFIC(0x2)
 
 struct drm_exynos_g2d_event {
 	struct drm_event	base;
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 9debb320c34b..03e8a0c7f778 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -224,7 +224,7 @@ struct drm_virtgpu_context_init {
  * effect.  The event size is sizeof(drm_event), since there is no additional
  * payload.
  */
-#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000
+#define VIRTGPU_EVENT_FENCE_SIGNALED DRM_EVENT_VENDOR_SPECIFIC(0x10000000)
 
 #define DRM_IOCTL_VIRTGPU_MAP \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 7d786a0cc835..5e5878384e60 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -715,7 +715,7 @@ struct drm_vmw_fence_arg {
 /*
  * The event type
  */
-#define DRM_VMW_EVENT_FENCE_SIGNALED 0x80000000
+#define DRM_VMW_EVENT_FENCE_SIGNALED DRM_EVENT_VENDOR_SPECIFIC(0x0)
 
 struct drm_vmw_event_fence {
 	struct drm_event base;

base-commit: 0e0611827f3349d0a2ac121c023a6d3260dcecdb
-- 
2.54.0



^ permalink raw reply related

* Re: [PATCH v1] MAINTAINERS: Add git tree for TI K3 ARCHITECTURE
From: Nishanth Menon @ 2026-06-15 13:46 UTC (permalink / raw)
  To: Francesco Dolcini
  Cc: Vignesh Raghavendra, Tero Kristo, linux-arm-kernel, linux-kernel,
	Francesco Dolcini
In-Reply-To: <20260615132640.161584-1-francesco@dolcini.it>

On 15:26-20260615, Francesco Dolcini wrote:
> From: Francesco Dolcini <francesco.dolcini@toradex.com>
> 
> Add git tree for TI K3 architecture.
> 
> Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>

Thanks for adding this in..
> ---
>  MAINTAINERS | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 8629ed2aa82f..cbc56dc242b9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3756,6 +3756,7 @@ M:	Vignesh Raghavendra <vigneshr@ti.com>
>  M:	Tero Kristo <kristo@kernel.org>
>  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
>  S:	Supported
> +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git

Is it better to use https?
https://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git ?

>  F:	Documentation/devicetree/bindings/arm/ti/k3.yaml
>  F:	Documentation/devicetree/bindings/hwinfo/ti,k3-socinfo.yaml
>  F:	arch/arm64/boot/dts/ti/Makefile
> -- 
> 2.47.3
> 

-- 
Regards,
Nishanth Menon
Key (0xDDB5849D1736249D) / Fingerprint: F8A2 8693 54EB 8232 17A3  1A34 DDB5 849D 1736 249D
https://ti.com/opensource


^ permalink raw reply

* Re: [PATCH v1 06/11] KVM: arm64: Factor out reusable vCPU reset helpers
From: Fuad Tabba @ 2026-06-15 13:45 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <ai_7FupwNYrudRZZ@google.com>

On Mon, 15 Jun 2026 at 14:16, Vincent Donnefort <vdonnefort@google.com> wrote:
>
> On Fri, Jun 12, 2026 at 07:59:20AM +0100, tabba@google.com wrote:
> > Pull the reusable pieces out of kvm_reset_vcpu(): expose the reset
> > PSTATE values in kvm_arm.h, and split the core register reset and the
> > PSCI-driven reset into kvm_reset_vcpu_core() and kvm_reset_vcpu_psci().
> > A follow-up series reuses these to reset protected vCPUs at EL2.
> >
> > No functional change intended.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/include/asm/kvm_arm.h     | 12 ++++++
> >  arch/arm64/include/asm/kvm_emulate.h | 58 +++++++++++++++++++++++++++
> >  arch/arm64/kvm/reset.c               | 60 ++--------------------------
> >  3 files changed, 73 insertions(+), 57 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> > index 3f9233b5a130..aba4ec09acd2 100644
> > --- a/arch/arm64/include/asm/kvm_arm.h
> > +++ b/arch/arm64/include/asm/kvm_arm.h
> > @@ -348,4 +348,16 @@
> >       { PSR_AA32_MODE_UND,    "32-bit UND" }, \
> >       { PSR_AA32_MODE_SYS,    "32-bit SYS" }
> >
> > +/*
> > + * ARMv8 Reset Values
> > + */
> > +#define VCPU_RESET_PSTATE_EL1        (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
> > +                              PSR_F_BIT | PSR_D_BIT)
> > +
> > +#define VCPU_RESET_PSTATE_EL2        (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
> > +                              PSR_F_BIT | PSR_D_BIT)
> > +
> > +#define VCPU_RESET_PSTATE_SVC        (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
> > +                              PSR_AA32_I_BIT | PSR_AA32_F_BIT)
> > +
> >  #endif /* __ARM64_KVM_ARM_H__ */
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index aed9fc0b717b..8436e71c402d 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -704,4 +704,62 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
> >                       vcpu->arch.hcrx_el2 |= HCRX_EL2_EnASR;
> >       }
> >  }
> > +
> > +/* Reset a vcpu's core registers. */
> > +static inline void kvm_reset_vcpu_core(struct kvm_vcpu *vcpu)
> > +{
> > +     u32 pstate;
> > +
> > +     if (vcpu_el1_is_32bit(vcpu)) {
> > +             pstate = VCPU_RESET_PSTATE_SVC;
> > +     } else if (vcpu_has_nv(vcpu)) {
> > +             pstate = VCPU_RESET_PSTATE_EL2;
> > +     } else {
> > +             pstate = VCPU_RESET_PSTATE_EL1;
> > +     }
>
> nit: no brackes here, actually there were none before.

Ack.
/fuad

>
> > +
> > +     /* Reset core registers */
> > +     memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
> > +     memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
> > +     vcpu->arch.ctxt.spsr_abt = 0;
> > +     vcpu->arch.ctxt.spsr_und = 0;
> > +     vcpu->arch.ctxt.spsr_irq = 0;
> > +     vcpu->arch.ctxt.spsr_fiq = 0;
> > +     vcpu_gp_regs(vcpu)->pstate = pstate;
> > +}
> > +
> > +/* PSCI reset handling for a vcpu. */
> > +static inline void kvm_reset_vcpu_psci(struct kvm_vcpu *vcpu,
> > +                                    struct vcpu_reset_state *reset_state)
> > +{
> > +     unsigned long target_pc = reset_state->pc;
> > +
> > +     /* Gracefully handle Thumb2 entry point */
> > +     if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
> > +             target_pc &= ~1UL;
> > +             vcpu_set_thumb(vcpu);
> > +     }
> > +
> > +     /* Propagate caller endianness */
> > +     if (reset_state->be)
> > +             kvm_vcpu_set_be(vcpu);
> > +
> > +     *vcpu_pc(vcpu) = target_pc;
> > +
> > +     /*
> > +      * We may come from a state where either a PC update was
> > +      * pending (SMC call resulting in PC being increpented to
> > +      * skip the SMC) or a pending exception. Make sure we get
> > +      * rid of all that, as this cannot be valid out of reset.
> > +      *
> > +      * Note that clearing the exception mask also clears PC
> > +      * updates, but that's an implementation detail, and we
> > +      * really want to make it explicit.
> > +      */
> > +     vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
> > +     vcpu_clear_flag(vcpu, EXCEPT_MASK);
> > +     vcpu_clear_flag(vcpu, INCREMENT_PC);
> > +     vcpu_set_reg(vcpu, 0, reset_state->r0);
> > +}
> > +
> >  #endif /* __ARM64_KVM_EMULATE_H__ */
> > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> > index 60969d90bdd3..e22d0be9e57c 100644
> > --- a/arch/arm64/kvm/reset.c
> > +++ b/arch/arm64/kvm/reset.c
> > @@ -34,18 +34,6 @@
> >  static u32 __ro_after_init kvm_ipa_limit;
> >  unsigned int __ro_after_init kvm_host_sve_max_vl;
> >
> > -/*
> > - * ARMv8 Reset Values
> > - */
> > -#define VCPU_RESET_PSTATE_EL1        (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
> > -                              PSR_F_BIT | PSR_D_BIT)
> > -
> > -#define VCPU_RESET_PSTATE_EL2        (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
> > -                              PSR_F_BIT | PSR_D_BIT)
> > -
> > -#define VCPU_RESET_PSTATE_SVC        (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
> > -                              PSR_AA32_I_BIT | PSR_AA32_F_BIT)
> > -
> >  unsigned int __ro_after_init kvm_sve_max_vl;
> >
> >  int __init kvm_arm_init_sve(void)
> > @@ -191,7 +179,6 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >  {
> >       struct vcpu_reset_state reset_state;
> >       bool loaded;
> > -     u32 pstate;
> >
> >       scoped_guard(spinlock, &vcpu->arch.mp_state_lock) {
> >               reset_state = vcpu->arch.reset_state;
> > @@ -210,21 +197,8 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >               kvm_vcpu_reset_sve(vcpu);
> >       }
> >
> > -     if (vcpu_el1_is_32bit(vcpu))
> > -             pstate = VCPU_RESET_PSTATE_SVC;
> > -     else if (vcpu_has_nv(vcpu))
> > -             pstate = VCPU_RESET_PSTATE_EL2;
> > -     else
> > -             pstate = VCPU_RESET_PSTATE_EL1;
> > -
> >       /* Reset core registers */
> > -     memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
> > -     memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
> > -     vcpu->arch.ctxt.spsr_abt = 0;
> > -     vcpu->arch.ctxt.spsr_und = 0;
> > -     vcpu->arch.ctxt.spsr_irq = 0;
> > -     vcpu->arch.ctxt.spsr_fiq = 0;
> > -     vcpu_gp_regs(vcpu)->pstate = pstate;
> > +     kvm_reset_vcpu_core(vcpu);
> >
> >       /* Reset system registers */
> >       kvm_reset_sys_regs(vcpu);
> > @@ -233,36 +207,8 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >        * Additional reset state handling that PSCI may have imposed on us.
> >        * Must be done after all the sys_reg reset.
> >        */
> > -     if (reset_state.reset) {
> > -             unsigned long target_pc = reset_state.pc;
> > -
> > -             /* Gracefully handle Thumb2 entry point */
> > -             if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
> > -                     target_pc &= ~1UL;
> > -                     vcpu_set_thumb(vcpu);
> > -             }
> > -
> > -             /* Propagate caller endianness */
> > -             if (reset_state.be)
> > -                     kvm_vcpu_set_be(vcpu);
> > -
> > -             *vcpu_pc(vcpu) = target_pc;
> > -
> > -             /*
> > -              * We may come from a state where either a PC update was
> > -              * pending (SMC call resulting in PC being increpented to
> > -              * skip the SMC) or a pending exception. Make sure we get
> > -              * rid of all that, as this cannot be valid out of reset.
> > -              *
> > -              * Note that clearing the exception mask also clears PC
> > -              * updates, but that's an implementation detail, and we
> > -              * really want to make it explicit.
> > -              */
> > -             vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
> > -             vcpu_clear_flag(vcpu, EXCEPT_MASK);
> > -             vcpu_clear_flag(vcpu, INCREMENT_PC);
> > -             vcpu_set_reg(vcpu, 0, reset_state.r0);
> > -     }
> > +     if (reset_state.reset)
> > +             kvm_reset_vcpu_psci(vcpu, &reset_state);
> >
> >       /* Reset timer */
> >       kvm_timer_vcpu_reset(vcpu);
> > --
> > 2.54.0.1136.gdb2ca164c4-goog
> >


^ permalink raw reply

* Re: [PATCH v1 4/4] iommu/arm-smmu-v3: Process vIOMMU invalidations in batches
From: Jason Gunthorpe @ 2026-06-15 13:43 UTC (permalink / raw)
  To: Nicolin Chen
  Cc: Will Deacon, Kevin Tian, Robin Murphy, Joerg Roedel, Shuah Khan,
	Pranjal Shrivastava, Kees Cook, Yi Liu, Eric Auger,
	linux-arm-kernel, iommu, linux-kernel, linux-kselftest
In-Reply-To: <aixZzvVWmAFEDQZI@nvidia.com>

On Fri, Jun 12, 2026 at 12:11:10PM -0700, Nicolin Chen wrote:

> VMM would have to know which command failed, to flag it in the CONS
> register, indicating: a) commands prior to the CONS are issued, and
> b) command pointed by the CONS is illegal.

It is a VMM bug to send a malformed command into the kernel in the
first place.

Jason


^ permalink raw reply

* Re: [PATCH v3 1/3] ufs: core: Add get_hba_nortt callback for vendor-specific RTT capability
From: Bart Van Assche @ 2026-06-15 13:41 UTC (permalink / raw)
  To: ed.tsai, alim.akhtar, avri.altman, James.Bottomley,
	martin.petersen, linux-scsi, Matthias Brugger,
	AngeloGioacchino Del Regno
  Cc: linux-kernel, linux-arm-kernel, linux-mediatek, wsd_upstream,
	peter.wang, alice.chao, naomi.chu, chun-hung.wu
In-Reply-To: <20260615055802.105479-2-ed.tsai@mediatek.com>

On 6/14/26 10:57 PM, ed.tsai@mediatek.com wrote:
> The number of outstanding RTTs read from host controller capability
> register is problematic on some platforms. Add a new vendor callback
> get_hba_nortt() to allow platform vendors to override the default RTT
> capability value with platform-specific handling.
> 
> This patch keeps max_num_rtt field for bisectability and will be removed
> in a later patch once all platforms are migrated.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>


^ permalink raw reply

* Re: [PATCH v3 3/3] ufs: core: Remove max_num_rtt field from ufs_hba_variant_ops
From: Bart Van Assche @ 2026-06-15 13:40 UTC (permalink / raw)
  To: ed.tsai, alim.akhtar, avri.altman, James.Bottomley,
	martin.petersen, linux-scsi, Matthias Brugger,
	AngeloGioacchino Del Regno
  Cc: linux-kernel, linux-arm-kernel, linux-mediatek, wsd_upstream,
	peter.wang, alice.chao, naomi.chu, chun-hung.wu
In-Reply-To: <20260615055802.105479-4-ed.tsai@mediatek.com>

On 6/14/26 10:57 PM, ed.tsai@mediatek.com wrote:
> Remove the max_num_rtt field from ufs_hba_variant_ops as it has been
> replaced by the get_hba_nortt() callback which provides more flexible
> platform-specific RTT capability handling.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>


^ permalink raw reply

* Re: [PATCH 1/3] dt-bindings: phy: nuvoton,ma35d1-usb2-phy: extend for dual-port OTG support
From: Rob Herring (Arm) @ 2026-06-15 13:40 UTC (permalink / raw)
  To: Joey Lu
  Cc: Hui-Ping Chen, Neil Armstrong, Conor Dooley, Vinod Koul,
	devicetree, Catalin Marinas, linux-arm-kernel,
	Krzysztof Kozlowski, linux-kernel, Joey Lu, Jacky Huang,
	Arnd Bergmann, linux-phy, Shan-Chun Hung
In-Reply-To: <20260615054911.48821-2-a0987203069@gmail.com>


On Mon, 15 Jun 2026 13:49:09 +0800, Joey Lu wrote:
> The MA35D1 has two USB PHY ports managed by the same hardware block:
> 
>   - PHY0 (index 0): OTG port shared between the DWC2 gadget controller
>     and EHCI0/OHCI0 host controllers.  A hardware mux follows the USB
>     ID pin automatically.
> 
>   - PHY1 (index 1): dedicated host-only port for EHCI1/OHCI1.
> 
> Extend the existing binding to cover both ports:
> 
>   - The PHY node is now a child of the system-management syscon node
>     with a reg property.  The nuvoton,sys phandle and clocks
>     properties are removed; the driver derives the regmap from its
>     parent, and clock gating is owned by each individual USB controller.
> 
>   - #phy-cells changes from 0 to 1: the cell selects the PHY port.
> 
>   - Two optional board-tuning properties are added: nuvoton,rcalcode
>     for per-port resistor trim and nuvoton,oc-active-high for
>     over-current polarity.
> 
> Signed-off-by: Joey Lu <a0987203069@gmail.com>
> ---
>  .../bindings/phy/nuvoton,ma35d1-usb2-phy.yaml | 62 ++++++++++++++-----
>  1 file changed, 48 insertions(+), 14 deletions(-)
> 

My bot found errors running 'make dt_binding_check' on your patch:

yamllint warnings/errors:

dtschema/dtc warnings/errors:
/builds/robherring/dt-review-ci/linux/Documentation/devicetree/bindings/phy/nuvoton,ma35d1-usb2-phy.example.dtb: system-management@40460000 (nuvoton,ma35d1-reset): '#address-cells', '#size-cells', 'usb-phy@60' do not match any of the regexes: '^pinctrl-[0-9]+$'
	from schema $id: http://devicetree.org/schemas/reset/nuvoton,ma35d1-reset.yaml
/builds/robherring/dt-review-ci/linux/Documentation/devicetree/bindings/phy/nuvoton,ma35d1-usb2-phy.example.dtb: system-management@40460000 (nuvoton,ma35d1-reset): compatible: ['nuvoton,ma35d1-reset', 'syscon', 'simple-mfd'] is too long
	from schema $id: http://devicetree.org/schemas/reset/nuvoton,ma35d1-reset.yaml
/builds/robherring/dt-review-ci/linux/Documentation/devicetree/bindings/phy/nuvoton,ma35d1-usb2-phy.example.dtb: system-management@40460000 (nuvoton,ma35d1-reset): reg: [[0, 1078329344], [0, 512]] is too long
	from schema $id: http://devicetree.org/schemas/reset/nuvoton,ma35d1-reset.yaml

doc reference errors (make refcheckdocs):

See https://patchwork.kernel.org/project/devicetree/patch/20260615054911.48821-2-a0987203069@gmail.com

The base for the series is generally the latest rc1. A different dependency
should be noted in *this* patch.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:

pip3 install dtschema --upgrade

Please check and re-submit after running the above command yourself. Note
that DT_SCHEMA_FILES can be set to your schema file to speed up checking
your schema. However, it must be unset to test all examples with your schema.



^ permalink raw reply

* Re: [PATCH net-next v7 01/12] net: phylink: keep and use MAC supported_interfaces in phylink struct
From: Maxime Chevallier @ 2026-06-15 13:33 UTC (permalink / raw)
  To: Christian Marangi, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Simon Horman, Jonathan Corbet, Shuah Khan,
	Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
	Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
	linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260615122950.22281-2-ansuelsmth@gmail.com>

Hello Christian,

On 6/15/26 14:29, Christian Marangi wrote:
> Add in phylink struct a copy of supported_interfaces from phylink_config
> and make use of that instead of relying on phylink_config value.
> 
> This in preparation for support of PCS handling internally to phylink
> where a PCS can be removed or added after the phylink is created and we
> need both a reference of the supported_interfaces value from
> phylink_config and an internal value that can be updated with the new
> PCS info.
> 
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---
>  drivers/net/phy/phylink.c | 22 +++++++++++++++-------
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
> index 087ac63f9193..4d59c0dd78db 100644
> --- a/drivers/net/phy/phylink.c
> +++ b/drivers/net/phy/phylink.c
> @@ -60,6 +60,11 @@ struct phylink {
>  	/* The link configuration settings */
>  	struct phylink_link_state link_config;
>  
> +	/* What interface are supported by the current link.
> +	 * Can change on removal or addition of new PCS.
> +	 */
> +	DECLARE_PHY_INTERFACE_MASK(supported_interfaces);

Can you clarify a bit what you mean here ? Is that the combination of the
interfaces the MAC supports AND the currently in-use PCS ?

Maxime



^ permalink raw reply

* Re: [PATCH v11 0/3] Add eDP support for RK3576
From: Damon Ding @ 2026-06-15 13:32 UTC (permalink / raw)
  To: Heiko Stübner, robh, krzk+dt, conor+dt
  Cc: sebastian.reichel, nicolas.frattaroli, alchark, detlev.casanova,
	cristian.ciocaltea, michael.riesch, andy.yan, devicetree,
	linux-arm-kernel, linux-rockchip, linux-kernel
In-Reply-To: <3213683.CbtlEUcBR6@diego>

On 6/15/2026 9:01 PM, Heiko Stübner wrote:
> Hi Damon,
> 
> Am Montag, 15. Juni 2026, 14:33:03 Mitteleuropäische Sommerzeit schrieb Damon Ding:
>> Gentle ping on this patch series.
> 
> Linux 7.1 was released yesterday, so we're in the merge-window now.
> (And the 5th of june was shortly before -rc7, so too late for 7.2)
> 
> So I'll pick those up after the merge window end in 2 weeks.
> 
> 

Thanks for the heads-up. Got it. ;-)

Best regards,
Damon

>>
>> On 6/5/2026 10:23 AM, Damon Ding wrote:
>>> Picked from:
>>> https://lore.kernel.org/all/20260601065100.1103873-1-damon.ding@rock-chips.com/
>>>
>>> Patch 1-2 are to add missing clock "hclk" for RK3588 eDP nodes.
>>> Patch 3 is to add the RK3576 eDP node.
>>>
>>> Damon Ding (3):
>>>     arm64: dts: rockchip: Add missing hclk for RK3588 eDP0
>>>     arm64: dts: rockchip: Add missing hclk for RK3588 eDP1
>>>     arm64: dts: rockchip: Add eDP node for RK3576
>>>
>>>    arch/arm64/boot/dts/rockchip/rk3576.dtsi      | 28 +++++++++++++++++++
>>>    arch/arm64/boot/dts/rockchip/rk3588-base.dtsi |  4 +--
>>>    .../arm64/boot/dts/rockchip/rk3588-extra.dtsi |  4 +--
>>>    3 files changed, 32 insertions(+), 4 deletions(-)
>>>
>>> ---
>>>
>>> Changes in v2:
>>> - Split out separate patches to add the "hclk" clock reference.
>>> - Split out separate patches to enable the "hclk" clock.
>>> - Add Reviewed-by tag.
>>>
>>> Changes in v3:
>>> - Add a patch to expand descriptions for clocks of the eDP node.
>>> - Add Reviewed-by tag.
>>>
>>> Changes in v4:
>>> - Modify commit msg.
>>>
>>> Changes in v5:
>>> - Enforce the correct third clock name on a per-compatible basis.
>>> - Modify the commit msg simultaneously.
>>> - Add Acked-by tag.
>>>
>>> Changes in v6:
>>> - Expand more detail commit msg about using hclk instead of grf clock.
>>>
>>> Changes in v7:
>>> - List all valid clock names at the top level, and constrain the clock
>>>     count for each platform with minItems/maxItems in allOf.
>>>
>>> Changes in v8:
>>> - Fix indentation to 10 for enum in clock-names property.
>>>
>>> Changes in v9:
>>> - Restore the explicit clock-names for RK3399 and RK3588 eDP dt-bindings.
>>>
>>> Changes in v10:
>>> - Use automatic cleanup to fix OF node reference leak reported by
>>>     Sashiko.
>>>
>>> Changes in v11:
>>> - Pick and rebase DT related patches.
>>>
>>
>>
> 
> 
> 
> 
> 
> 



^ permalink raw reply

* Re: [PATCH net-next v7 02/12] net: phylink: introduce internal phylink PCS handling
From: Maxime Chevallier @ 2026-06-15 13:31 UTC (permalink / raw)
  To: Christian Marangi, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Simon Horman, Jonathan Corbet, Shuah Khan,
	Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
	Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
	linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260615122950.22281-3-ansuelsmth@gmail.com>

Hi Christian,

On 6/15/26 14:29, Christian Marangi wrote:
> Introduce internal handling of PCS for phylink. This is an alternative
> way to .mac_select_pcs that moves the selection logic of the PCS entirely
> to phylink with the usage of the supported_interface value in the PCS
> struct.
> 
> MAC should now provide a callback to fill the available PCS in
> phylink_config in .fill_available_pcs and fill the .num_possible_pcs with
> the number of elements in the array. MAC should also define a new bitmap,
> pcs_interfaces, in phylink_config to define for what interface mode a
> dedicated PCS is required.
> 
> On phylink_create(), an array of PCS pointer is allocated of size
> .num_possible_pcs from phylink_config and .fill_available_pcs from
> phylink_config is called passing as args the just allocated array and
> the number of possible element in it.
> 
> MAC will fill this passed array with all the available PCS.
> 
> This array is then parsed and a linked list of PCS is created based on
> the allocated PCS array filled by MAC via .fill_available_pcs().
> 
> Every PCS in phylink PCS list gets then linked to the phylink instance
> by setting the phylink value in phylink_pcs struct to the phylink instance.
> Also the supported_interface value in phylink struct is updated with
> the new supported_interface from the provided PCS.
> 
> On phylink_destroy(), every PCS in phylink PCS list is unlinked from the
> phylink instance by setting the phylink value in phylink_pcs struct to NULL
> and removed from the PCS list.
> 
> phylink_validate_mac_and_pcs(), phylink_major_config() and
> phylink_inband_caps() are updated to support this new implementation
> with the PCS list stored in phylink.
> 
> They will make use of phylink_validate_pcs_interface() that will loop
> for every PCS in the phylink PCS available list and find one that supports
> the passed interface.
> 
> phylink_validate_pcs_interface() applies the same logic of .mac_select_pcs
> where if a supported_interface value is not set for the PCS struct, then
> it's assumed every interface is supported.
> 
> A MAC is required to implement either a .mac_select_pcs or make use of
> the PCS list implementation. Implementing both will result in a fail
> on phylink_create().
> 
> A MAC defining .num_possible_pcs in phylink_config MUST also define a
> .fill_available_pcs or phylink_create() will fail with an negative error.
> 
> phylink value in phylink_pcs struct with this implementation is used to
> track from PCS side when it's attached to a phylink instance. PCS driver
> will make use of this information to correctly detach from a phylink
> instance if needed.
> 
> phylink_pcs_change() is also changed to verify that the PCS that triggered
> a link change is the one that is currently used by the phylink instance.
> 
> The .mac_select_pcs implementation is not changed but it's expected that
> every MAC driver migrates to the new implementation to later deprecate
> and remove .mac_select_pcs.
> 
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---

[...]

> @@ -1872,10 +1993,28 @@ struct phylink *phylink_create(struct phylink_config *config,
>  	mutex_init(&pl->phydev_mutex);
>  	mutex_init(&pl->state_mutex);
>  	INIT_WORK(&pl->resolve, phylink_resolve);
> +	INIT_LIST_HEAD(&pl->pcs_list);
> +
> +	/* Fill the PCS list with available PCS from phylink config */
> +	ret = phylink_fill_available_pcs(pl, config);
> +	if (ret < 0) {
> +		kfree(pl);
> +		return ERR_PTR(ret);
> +	}
> +
> +	/* Link available PCS to phylink */
> +	list_for_each_entry(pcs, &pl->pcs_list, list)
> +		pcs->phylink = pl;
>  
>  	phy_interface_copy(pl->supported_interfaces,
>  			   config->supported_interfaces);
>  
> +	/* Update supported interfaces */
> +	list_for_each_entry(pcs, &pl->pcs_list, list)
> +		phy_interface_or(pl->supported_interfaces,
> +				 pl->supported_interfaces,
> +				 pcs->supported_interfaces);
> +

I'm not entirely sure about that, we may need to restrict the supported_interfaces
from the MAC.

As an example, take mvpp2. We have 2 PCSs, one for BaseX/SGMII, one for BaseR. But
if we don't have a comphy (generic PHY) device, then we can't use all the
combination of modes our PCSs can provide :

https://elixir.bootlin.com/linux/v7.1-rc7/source/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c#L7074

These aren't external PCS IPs, but from what I understand you'd like to
handle these the same way as purely external PCSs, right ?

I'd say the MAC driver utltimately has the knowledge of all possible interfaces.

The way I see it, it's probably safer to let the MAC give a wide range of interfaces,
and filter that down with what the PCSs can provide (i.e. turn that or into an and,
while handling the case where the pcs supported interfaces is empty).

What do you think ?

Maxime


^ permalink raw reply

* Re: [PATCH v1 05/11] KVM: arm64: Make vcpu_{read,write}_sys_reg available to HYP code
From: Fuad Tabba @ 2026-06-15 13:29 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <ai_5_CoRm-LdoVMm@google.com>

On Mon, 15 Jun 2026 at 14:11, Vincent Donnefort <vdonnefort@google.com> wrote:
>
> On Fri, Jun 12, 2026 at 07:59:19AM +0100, tabba@google.com wrote:
> > The vcpu_{read,write}_sys_reg() accessors are host-only, so helpers
> > built on them such as kvm_vcpu_set_be()/kvm_vcpu_is_be() cannot be
> > shared with hyp code. Add _vcpu_read_sys_reg()/_vcpu_write_sys_reg()
> > inlines in kvm_emulate.h that dispatch on is_nvhe_hyp_code() to the
> > host- or hyp-side accessor. A follow-up series uses this to share that
> > emulation code at EL2.
> >
> > No functional change intended.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/include/asm/kvm_emulate.h | 22 +++++++++++++++++++---
> >  1 file changed, 19 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index 5bf3d7e1d92c..aed9fc0b717b 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -506,6 +506,22 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >       return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> >  }
> >
> > +static inline u64 _vcpu_read_sys_reg(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg)
>
> I am not sure a single underscore is widely used in the kernel.
>
> Why not move __vcpu_read_sys_reg() and __vcpu_write_sys_reg() from exception.c
> to kvm_emulate.h? especially this already checks has_vhe().

Good point. has_vhe() already returns a compile-time constant
in hyp code and a runtime cap check at EL1, so it works in all
contexts. I'll move them to kvm_emulate.h and rename to
kvm_vcpu_read_sys_reg() / kvm_vcpu_write_sys_reg().

Cheers,
/fuad

>
> > +{
> > +     if (!is_nvhe_hyp_code())
> > +             return vcpu_read_sys_reg(vcpu, reg);
> > +
> > +     return __vcpu_sys_reg(vcpu, reg);
> > +}
> > +
> > +static inline void _vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg)
> > +{
> > +     if (!is_nvhe_hyp_code())
> > +             vcpu_write_sys_reg(vcpu, val, reg);
> > +     else
> > +             __vcpu_assign_sys_reg(vcpu, reg, val);
> > +}
> > +
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> >       if (vcpu_mode_is_32bit(vcpu)) {
> > @@ -516,9 +532,9 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >
> >               r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
> >
> > -             sctlr = vcpu_read_sys_reg(vcpu, r);
> > +             sctlr = _vcpu_read_sys_reg(vcpu, r);
> >               sctlr |= SCTLR_ELx_EE;
> > -             vcpu_write_sys_reg(vcpu, sctlr, r);
> > +             _vcpu_write_sys_reg(vcpu, sctlr, r);
> >       }
> >  }
> >
> > @@ -533,7 +549,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
> >       r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
> >       bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E;
> >
> > -     return vcpu_read_sys_reg(vcpu, r) & bit;
> > +     return _vcpu_read_sys_reg(vcpu, r) & bit;
> >  }
> >
> >  static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
> > --
> > 2.54.0.1136.gdb2ca164c4-goog
> >


^ permalink raw reply

* [PATCH v1] MAINTAINERS: Add git tree for TI K3 ARCHITECTURE
From: Francesco Dolcini @ 2026-06-15 13:26 UTC (permalink / raw)
  To: Nishanth Menon, Vignesh Raghavendra, Tero Kristo,
	linux-arm-kernel, linux-kernel
  Cc: Francesco Dolcini

From: Francesco Dolcini <francesco.dolcini@toradex.com>

Add git tree for TI K3 architecture.

Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8629ed2aa82f..cbc56dc242b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3756,6 +3756,7 @@ M:	Vignesh Raghavendra <vigneshr@ti.com>
 M:	Tero Kristo <kristo@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
 F:	Documentation/devicetree/bindings/arm/ti/k3.yaml
 F:	Documentation/devicetree/bindings/hwinfo/ti,k3-socinfo.yaml
 F:	arch/arm64/boot/dts/ti/Makefile
-- 
2.47.3



^ permalink raw reply related

* Re: [PATCH v2 2/6] iommu/arm-smmu: Add interconnect bandwidth voting support
From: Bibek Kumar Patro @ 2026-06-15 13:25 UTC (permalink / raw)
  To: Dmitry Baryshkov
  Cc: Will Deacon, Robin Murphy, Joerg Roedel, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
	linux-arm-kernel, iommu, devicetree, linux-kernel, linux-arm-msm
In-Reply-To: <7xfxlxfqjcqdzl6gckaoyy2ioefglc7bgi66yv5khrbl6fi2zc@ivtiukdaj4jv>



On 6/8/2026 7:25 PM, Dmitry Baryshkov wrote:
> On Tue, May 26, 2026 at 08:12:03PM +0530, Bibek Kumar Patro wrote:
>> On some SoCs the SMMU registers require an active interconnect
>> bandwidth vote to be accessible. While other clients typically
>> satisfy this requirement implicitly, certain corner cases (e.g.
>> during sleep/wakeup transitions) can leave the SMMU without a
>> vote, causing intermittent register access failures.
>>
>> Add support for an optional interconnect path to the arm-smmu
>> driver and vote for bandwidth while the SMMU is active. The path
>> is acquired from DT if present and ignored otherwise.
>>
>> The bandwidth vote is enabled before accessing SMMU registers
>> during probe and runtime resume, and released during runtime
>> suspend and on error paths.
>>
>> Generally, from an architectural perspective, GEM_NOC and DDR are
>> expected to have an active vote whenever the adreno_smmu block is
>> powered on. In most common use cases, this requirement is implicitly
>> satisfied because other GPU-related clients (for example, the GMU
>> device) already hold a GEM_NOC vote when adreno_smmu is enabled.
>>
>> However, there are certain corner cases, such as during sleep/wakeup
>> transitions, where the GEM_NOC vote can be removed before adreno_smmu
>> is powered down. If adreno_smmu is then accessed while the interconnect
>> vote is missing, it can lead to the observed failures. Because of the
>> precise ordering involved, this scenario is difficult to reproduce
>> consistently.
>> (also GDSC is involved in adreno usecases can have an independent vote)
>>
>> Signed-off-by: Bibek Kumar Patro <bibek.patro@oss.qualcomm.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c | 57 +++++++++++++++++++++++++++++++++--
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h |  2 ++
>>   2 files changed, 57 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 0bd21d206eb3e75c3b9fb1364cdc92e82c5aa499..07c7e44ec6a5bd1488f00f87d859a20495e46601 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -53,6 +53,11 @@
>>   #define MSI_IOVA_BASE			0x8000000
>>   #define MSI_IOVA_LENGTH			0x100000
>>   
>> +/* Interconnect bandwidth vote values for the SMMU register access path */
>> +#define ARM_SMMU_ICC_AVG_BW		0
>> +#define ARM_SMMU_ICC_PEAK_BW_HIGH	1000
> 
> totally random numbers, which might be different for non-Qualcomm platform.
> 
>> +#define ARM_SMMU_ICC_PEAK_BW_LOW	0
>> +
>>   static int force_stage;
>>   module_param(force_stage, int, S_IRUGO);
>>   MODULE_PARM_DESC(force_stage,
>> @@ -86,6 +91,36 @@ static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
>>   	}
>>   }
>>   
>> +static int arm_smmu_icc_get(struct arm_smmu_device *smmu)
>> +{
>> +	smmu->icc_path = devm_of_icc_get(smmu->dev, NULL);
> 
> Is there always only one bus / path in question?
> 

<Apologies, missed to respond to this query>
Yes for TCU, it needs to only have a vote on GEM_NOC interconnect
while accessing the DDR in downstream path (client->TCU->DDR), which we 
are addressing here.
Hence it's only one icc path in question here.

Thanks & regards,
Bibek

>> +	if (IS_ERR(smmu->icc_path)) {
> 
> if (!IS_ERR(smmu->icc_path))
> 	return 0;
> 
> int err = PTR_ERR();
> if (err == -ENODEV) {
> 	icc_path = NULL;
> 	return 0;
> }
> 
> return dev_err_probe();
> 
> 
>> +		int err = PTR_ERR(smmu->icc_path);
>> +
>> +		if (err == -ENODEV) {
>> +			smmu->icc_path = NULL;
>> +			return 0;
>> +		}
>> +		return dev_err_probe(smmu->dev, err,
>> +				     "failed to get interconnect path\n");
>> +	}
>> +	return 0;
>> +}
>> +
>> +static void arm_smmu_icc_enable(struct arm_smmu_device *smmu)
>> +{
>> +	if (smmu->icc_path)
> 
> Drop the if.
> 
>> +		WARN_ON(icc_set_bw(smmu->icc_path, ARM_SMMU_ICC_AVG_BW,
>> +				   ARM_SMMU_ICC_PEAK_BW_HIGH));
> 
> WARN_ON_ONCE()?
> 
> Pass the error to the caller.
> 
> 
>> +}
>> +
>> +static void arm_smmu_icc_disable(struct arm_smmu_device *smmu)
>> +{
>> +	if (smmu->icc_path)
> 
> Drop the if.
> 
>> +		WARN_ON(icc_set_bw(smmu->icc_path, ARM_SMMU_ICC_AVG_BW,
>> +				   ARM_SMMU_ICC_PEAK_BW_LOW));
> 
> Pass the error to the caller.
> 
>> +}
>> +
>>   static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
>>   {
>>   	/*
>> @@ -2189,6 +2224,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>>   	if (err)
>>   		return err;
>>   
>> +	/*
>> +	 * Acquire and vote the interconnect path before accessing any SMMU
>> +	 * registers (including ARM_SMMU_GR0_ID0 in arm_smmu_device_cfg_probe).
>> +	 */
>> +	err = arm_smmu_icc_get(smmu);
>> +	if (err) {
>> +		clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks);
>> +		return err;
>> +	}
>> +	arm_smmu_icc_enable(smmu);
> 
> Handle the error.
> 
>> +
>>   	err = arm_smmu_device_cfg_probe(smmu);
>>   	if (err)
>>   		return err;
>> @@ -2273,8 +2319,10 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev)
>>   
>>   	if (pm_runtime_enabled(smmu->dev))
>>   		pm_runtime_force_suspend(smmu->dev);
>> -	else
>> +	else {
>>   		clk_bulk_disable(smmu->num_clks, smmu->clks);
>> +		arm_smmu_icc_disable(smmu);
> 
> Handle the error.
> 
> etc.
> 
>> +	}
>>   
>>   	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
>>   }
> 



^ permalink raw reply

* [PATCH net-next v7 06/12] net: Document PCS subsystem
From: Christian Marangi @ 2026-06-15 12:29 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Simon Horman, Jonathan Corbet, Shuah Khan, Christian Marangi,
	Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
	Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
	linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260615122950.22281-1-ansuelsmth@gmail.com>

Add extensive documentation of the new PCS subsystem and the fwnode
implementation with producer/consumer API.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/pcs.rst   | 229 +++++++++++++++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 Documentation/networking/pcs.rst

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 44a422ad3b05..3fce8f6ac089 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -28,6 +28,7 @@ Contents:
    net_failover
    page_pool
    phy
+   pcs
    sfp-phylink
    alias
    bridge
diff --git a/Documentation/networking/pcs.rst b/Documentation/networking/pcs.rst
new file mode 100644
index 000000000000..98592cdee3ef
--- /dev/null
+++ b/Documentation/networking/pcs.rst
@@ -0,0 +1,229 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+PCS Subsystem
+=============
+
+The PCS (Physical Coding Sublayer) subsystem handles the registration and lookup
+of PCS devices. These devices contain the upper sublayers of the Ethernet
+physical layer, generally handling framing, scrambling, and encoding tasks. PCS
+devices may also include PMA (Physical Medium Attachment) components. PCS
+devices transfer data between the Link-layer MAC device, and the rest of the
+physical layer, typically via a serdes. The output of the serdes may be
+connected more-or-less directly to the medium when using fiber-optic or
+backplane connections (1000BASE-SX, 1000BASE-KX, etc). It may also communicate
+with a separate PHY (such as over SGMII) which handles the connection to the
+medium (such as 1000BASE-T).
+
+Remark on usage of .mac_select_pcs and fw_node PCS
+--------------------------------------------------
+
+There are generally two ways to look up a PCS device.
+
+1. MAC OP struct .mac_select_pcs (considered legacy)
+2. firmware node (fwnode) PCS entirely handled by phylink
+
+Implementation 1 leaves the entire handling of the PCS to the MAC
+driver with the selection of the PCS driven by .mac_select_pcs.
+Custom implementations are required if the PCS is external to the MAC
+and needs to be handled by a separate driver.
+
+This implementation is considered legacy and it's suggested to
+switch to the new fwnode PCS.
+
+Looking up PCS Devices (fwnode implementation)
+-----------------------------------------------
+
+The lookup of a PCS device follows the common producer/consumer implementation
+used by similar subsystems with a ``#pcs-cells`` on the producer and a
+``pcs-handle`` property on the consumer::
+
+    pcs: pcs {
+        // ...
+        #pcs-cells = <0>;
+    };
+
+    ethernet-controller {
+        // ...
+        pcs-handle = <&pcs>;
+    };
+
+On :c:func:`phylink_create`, phylink will use the ``num_possible_pcs``
+value and ``fill_available_pcs`` helper function in
+:c:struct:`phylink_config` to compose the list of available PCS that can be
+used for the phylink instance.
+
+Phylink will then internally handle the selection of the correct PCS for
+the requested interface mode based on the interface modes configured in
+``pcs_interfaces`` in :c:struct:`phylink_config` struct and
+``supported_interfaces`` in :c:struct:`phylink_pcs` struct.
+
+A PCS is considered eligible when the requested interface mode is present
+in both ``pcs_interfaces`` in :c:struct:`phylink_config` struct and
+``supported_interfaces`` in :c:struct:`phylink_pcs` struct.
+
+``supported_interfaces`` describes all interface modes supported by the MAC,
+whereas ``pcs_interfaces`` identifies the subset that require PCS selection.
+
+For the special implementation where the PCS is internal or part of the MAC
+and a dedicated driver is not needed, it's possible to leave the implementation
+of the PCS to the MAC driver and just implement the ``num_possible_pcs``
+value and ``fill_available_pcs`` helper  function in
+:c:struct:`phylink_config` referencing the local :c:struct:`phylink_pcs`
+struct allocated from the MAC driver.
+
+Using PCS Devices
+-----------------
+
+It's mandatory to either implement the ``mac_select_pcs`` callback
+of :c:struct:`phylink_mac_ops` or ``num_possible_pcs`` and ``fill_available_pcs``
+of :c:struct:`phylink_config` to use a PCS for a MAC.
+
+The fwnode implementation exposes simple helpers to parse the PCS from
+the fwnode :c:func:`fwnode_phylink_pcs_count` and
+:c:func:`fwnode_phylink_pcs_parse`. The :c:func:`fwnode_phylink_pcs_count` helper
+takes the fwnode where the ``pcs-handle`` should be parsed and return the
+number of PCS entries described in the fwnode.
+The :c:func:`fwnode_phylink_pcs_parse` helper takes three arguments,
+the fwnode where the ``pcs-handle`` should be parsed, an allocated array
+of :c:struct:`phylink_pcs` pointer where to put the parsed PCS from the fwnode
+and the maximum number of PCS to parse.
+Contrary to :c:func:`fwnode_phylink_pcs_count`, :c:func:`fwnode_phylink_pcs_parse`
+helper fills the allocated array with ONLY the available PCS and return the
+number of available PCS found. PCS that returns -ENODEV will be skipped and
+won't be inserted in the allocated array.
+
+A phylink instance may use multiple PCS devices. The maximum number is reported
+through ``num_possible_pcs``.
+
+It's mandatory to specify for what interface a PCS is needed. This can be done
+by filling the ``pcs_interfaces`` in :c:struct:`phylink_config` struct.
+If the requested interface mode is not present in this bitmask, phylink does
+not search for a PCS for  that specific mode. (example MAC doesn't need a PCS
+for SGMII but require one for USXGMII)
+
+With the use of the :c:func:`fwnode_phylink_pcs_parse` a common implementation
+is the following::
+
+   static int mac_fill_available_pcs(struct phylink_config *config,
+   				                      struct phylink_pcs **available_pcs,
+					                      unsigned int num_possible_pcs)
+   {
+   	struct device *dev = config->dev;
+
+   	return fwnode_phylink_pcs_parse(dev_fwnode(dev), available_pcs,
+						                    num_possible_pcs);
+   }
+
+   static int mac_setup_phylink(struct net_device *netdev)
+   {
+      struct phylink_config *config;
+
+      // ...
+
+      config->dev = &netdev->dev;
+
+      // ...
+
+      // Parse possible PCS and fill num_possible_pcs.
+      config->num_possible_pcs = fwnode_phylink_pcs_count(dev_fwnode(&netdev->dev));
+      config->fill_available_pcs = mac_fill_available_pcs;
+
+      __set_bit(PHY_INTERFACE_MODE_INTERNAL, config->supported_interfaces);
+      __set_bit(PHY_INTERFACE_MODE_SGMII, config->supported_interfaces);
+      __set_bit(PHY_INTERFACE_MODE_1000BASEX, config->supported_interfaces);
+      __set_bit(PHY_INTERFACE_MODE_USXGMII, config->supported_interfaces);
+
+      // PCS required only for USXGMII
+      __set_bit(PHY_INTERFACE_MODE_USXGMII, config->pcs_interfaces);
+
+      phylink = phylink_create(config, //...
+
+It's worth to mention that it's phylink code that takes care of allocating
+the array of :c:struct:`phylink_pcs` pointer for ``fill_available_pcs``
+callback based on the value set in ``num_possible_pcs`` for
+:c:struct:`phylink_config` struct.
+
+The ``fill_available_pcs`` callback must not write more than
+``num_possible_pcs`` entries. The third argument may be used to validate
+that there is enough space to fill all the available PCS in the passed array
+of :c:struct:`phylink_pcs` pointer.
+
+The ``fill_available_pcs`` callback is called only on :c:func:`phylink_create`
+and is used only to compose the initial available PCS list. Ownership of PCS
+is held by phylink and :c:func:`phylink_release_pcs` should be used to release
+them.
+
+Writing PCS Drivers
+-------------------
+
+To write a PCS driver, first implement :c:struct:`phylink_pcs_ops`. Then,
+register your PCS in your probe function using :c:func:`fwnode_pcs_add_provider`.
+The :c:func:`fwnode_pcs_add_provider` takes three arguments, the fwnode where
+the PCS provider should be registered to, a get function to return the requested
+PCS based on ``#pcs-cells`` and a pointer to reference private data for the get
+function.
+
+The PCS will then be registered to a global list of PCS provider that the
+PCS fwnode implementation will use to parse it.
+
+For the simple case where the PCS driver expose a single PCS,
+:c:func:`fwnode_pcs_simple_get` can be used as the get function.
+
+You must call :c:func:`fwnode_pcs_del_provider` from your remove function and
+release the PCS from any phylink instance under RTNL lock with
+:c:func:`phylink_release_pcs`::
+
+   fwnode_pcs_del_provider(dev_fwnode(&pdev->dev));
+
+	rtnl_lock();
+
+	for (i = 0; i < data->num_port; i++) {
+		struct pcs_port *port = &priv->ports[i];
+
+		phylink_release_pcs(&port->pcs);
+	}
+
+	rtnl_unlock();
+
+Late PCS registration handling
+------------------------------
+
+It's possible that a PCS becomes available after the MAC finished probing.
+Contrary to the usual producer/consumer implementation, when a PCS is not
+registered and can't be found, the fwnode parser helper returns ``-ENODEV``
+instead of ``-EPROBE_DEFER``.
+
+This is to prevent race condition with particular devices that register
+MAC and PCS with USB or PCIe and require the MAC to be registered before
+the PCS.
+
+The phylink logic correctly handle this special case and keep the phylink
+instance in a fail condition.
+
+The PCS fwnode implementation provides a notifier to which each phylink
+instance with a non-empty ``pcs_interfaces`` in :c:type:`phylink_config`
+registers. When a new PCS provider is registered, the notifier is called
+triggering the :c:func:`pcs_provider_notify` function.
+
+Function :c:func:`pcs_provider_notify` will check if the just added PCS
+should be used by the phylink instance. If it should be used then,
+it's added to the internal list of available PCS and a phylink major
+config is forced.
+
+If a phylink instance was in a failure state, with the just added PCS
+now part of the available PCS internal phylink list, provided all other
+conditions are satisfied, the configuration is retried and the failure
+condition is cleared.
+
+API Reference
+-------------
+
+.. kernel-doc:: include/linux/phylink.h
+   :identifiers: phylink_pcs
+
+.. kernel-doc:: include/linux/pcs/pcs.h
+   :internal:
+
+.. kernel-doc:: include/linux/pcs/pcs-provider.h
+   :internal:
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH v1 03/11] KVM: arm64: Use guard()/scoped_guard() in arm64 KVM EL1 code
From: Fuad Tabba @ 2026-06-15 13:17 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <ai_3Hlf8CakQ7ESQ@google.com>

On Mon, 15 Jun 2026 at 13:59, Vincent Donnefort <vdonnefort@google.com> wrote:
>
> On Fri, Jun 12, 2026 at 07:59:17AM +0100, tabba@google.com wrote:
> > Convert the manual mutex_lock()/spin_lock() pairs in
> > arch/arm64/kvm/{pkvm,arm,mmu,reset,psci}.c to guard(mutex),
> > guard(spinlock) and scoped_guard(), dropping unlock-only goto labels in
> > favour of direct returns. Centralised cleanup gotos that still serve
> > other resources are preserved.
> >
> > reset.c uses scoped_guard() rather than guard() so the lock covers only
> > the small read/update window inside kvm_reset_vcpu(), leaving the rest
> > of the function outside the critical section.
>
> I believe in that case unless it really helps with cleaning resources, there's
> not much point using scoped_guard().
>
> I would keep it as is.

Ack.

>
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/kvm/arm.c   | 14 +++-----
> >  arch/arm64/kvm/mmu.c   | 80 +++++++++++++++---------------------------
> >  arch/arm64/kvm/pkvm.c  | 26 ++++++--------
> >  arch/arm64/kvm/psci.c  | 17 ++++-----
> >  arch/arm64/kvm/reset.c |  8 ++---
> >  5 files changed, 53 insertions(+), 92 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9453321ef8c6..c9f36932c980 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -793,9 +793,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
> >  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
> >                                   struct kvm_mp_state *mp_state)
> >  {
> > -     int ret = 0;
> > -
> > -     spin_lock(&vcpu->arch.mp_state_lock);
> > +     guard(spinlock)(&vcpu->arch.mp_state_lock);
> >
> >       switch (mp_state->mp_state) {
> >       case KVM_MP_STATE_RUNNABLE:
> > @@ -808,12 +806,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
> >               kvm_arm_vcpu_suspend(vcpu);
> >               break;
> >       default:
> > -             ret = -EINVAL;
> > +             return -EINVAL;
> >       }
> >
> > -     spin_unlock(&vcpu->arch.mp_state_lock);
> > -
> > -     return ret;
> > +     return 0;
> >  }
> >
> >  /**
> > @@ -1726,15 +1722,13 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
> >       /*
> >        * Handle the "start in power-off" case.
> >        */
> > -     spin_lock(&vcpu->arch.mp_state_lock);
> > +     guard(spinlock)(&vcpu->arch.mp_state_lock);
> >
> >       if (power_off)
> >               __kvm_arm_vcpu_power_off(vcpu);
> >       else
> >               WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
> >
> > -     spin_unlock(&vcpu->arch.mp_state_lock);
> > -
> >       return 0;
> >  }
> >
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 4da9281312eb..d18f4ce7ceae 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -391,13 +391,13 @@ static void stage2_flush_vm(struct kvm *kvm)
> >   */
> >  void __init free_hyp_pgds(void)
> >  {
> > -     mutex_lock(&kvm_hyp_pgd_mutex);
> > -     if (hyp_pgtable) {
> > -             kvm_pgtable_hyp_destroy(hyp_pgtable);
> > -             kfree(hyp_pgtable);
> > -             hyp_pgtable = NULL;
> > -     }
> > -     mutex_unlock(&kvm_hyp_pgd_mutex);
> > +     guard(mutex)(&kvm_hyp_pgd_mutex);
> > +     if (!hyp_pgtable)
> > +             return;
> > +
> > +     kvm_pgtable_hyp_destroy(hyp_pgtable);
> > +     kfree(hyp_pgtable);
> > +     hyp_pgtable = NULL;
> >  }
> >
> >  static bool kvm_host_owns_hyp_mappings(void)
> > @@ -424,16 +424,11 @@ static bool kvm_host_owns_hyp_mappings(void)
> >  int __create_hyp_mappings(unsigned long start, unsigned long size,
> >                         unsigned long phys, enum kvm_pgtable_prot prot)
> >  {
> > -     int err;
> > -
> >       if (WARN_ON(!kvm_host_owns_hyp_mappings()))
> >               return -EINVAL;
> >
> > -     mutex_lock(&kvm_hyp_pgd_mutex);
> > -     err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
> > -     mutex_unlock(&kvm_hyp_pgd_mutex);
> > -
> > -     return err;
> > +     guard(mutex)(&kvm_hyp_pgd_mutex);
> > +     return kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
> >  }
> >
> >  static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
> > @@ -481,56 +476,42 @@ static int share_pfn_hyp(u64 pfn)
> >  {
> >       struct rb_node **node, *parent;
> >       struct hyp_shared_pfn *this;
> > -     int ret = 0;
> >
> > -     mutex_lock(&hyp_shared_pfns_lock);
> > +     guard(mutex)(&hyp_shared_pfns_lock);
> >       this = find_shared_pfn(pfn, &node, &parent);
> >       if (this) {
> >               this->count++;
> > -             goto unlock;
> > +             return 0;
> >       }
> >
> >       this = kzalloc_obj(*this);
> > -     if (!this) {
> > -             ret = -ENOMEM;
> > -             goto unlock;
> > -     }
> > +     if (!this)
> > +             return -ENOMEM;
> >
> >       this->pfn = pfn;
> >       this->count = 1;
> >       rb_link_node(&this->node, parent, node);
> >       rb_insert_color(&this->node, &hyp_shared_pfns);
> > -     ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn);
> > -unlock:
> > -     mutex_unlock(&hyp_shared_pfns_lock);
> > -
> > -     return ret;
> > +     return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn);
> >  }
> >
> >  static int unshare_pfn_hyp(u64 pfn)
> >  {
> >       struct rb_node **node, *parent;
> >       struct hyp_shared_pfn *this;
> > -     int ret = 0;
> >
> > -     mutex_lock(&hyp_shared_pfns_lock);
> > +     guard(mutex)(&hyp_shared_pfns_lock);
> >       this = find_shared_pfn(pfn, &node, &parent);
> > -     if (WARN_ON(!this)) {
> > -             ret = -ENOENT;
> > -             goto unlock;
> > -     }
> > +     if (WARN_ON(!this))
> > +             return -ENOENT;
> >
> >       this->count--;
> >       if (this->count)
> > -             goto unlock;
> > +             return 0;
> >
> >       rb_erase(&this->node, &hyp_shared_pfns);
> >       kfree(this);
> > -     ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn);
> > -unlock:
> > -     mutex_unlock(&hyp_shared_pfns_lock);
> > -
> > -     return ret;
> > +     return kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn);
> >  }
> >
> >  int kvm_share_hyp(void *from, void *to)
> > @@ -655,7 +636,7 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
> >       unsigned long base;
> >       int ret = 0;
> >
> > -     mutex_lock(&kvm_hyp_pgd_mutex);
> > +     guard(mutex)(&kvm_hyp_pgd_mutex);
> >
> >       /*
> >        * This assumes that we have enough space below the idmap
> > @@ -670,8 +651,6 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
> >       base = io_map_base - size;
> >       ret = __hyp_alloc_private_va_range(base);
> >
> > -     mutex_unlock(&kvm_hyp_pgd_mutex);
> > -
> >       if (!ret)
> >               *haddr = base;
> >
> > @@ -714,17 +693,16 @@ int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
> >       size_t size;
> >       int ret;
> >
> > -     mutex_lock(&kvm_hyp_pgd_mutex);
> > -     /*
> > -      * Efficient stack verification using the NVHE_STACK_SHIFT bit implies
> > -      * an alignment of our allocation on the order of the size.
> > -      */
> > -     size = NVHE_STACK_SIZE * 2;
> > -     base = ALIGN_DOWN(io_map_base - size, size);
> > +     scoped_guard(mutex, &kvm_hyp_pgd_mutex) {
> > +             /*
> > +              * Efficient stack verification using the NVHE_STACK_SHIFT bit implies
> > +              * an alignment of our allocation on the order of the size.
> > +              */
> > +             size = NVHE_STACK_SIZE * 2;
> > +             base = ALIGN_DOWN(io_map_base - size, size);
> >
> > -     ret = __hyp_alloc_private_va_range(base);
> > -
> > -     mutex_unlock(&kvm_hyp_pgd_mutex);
> > +             ret = __hyp_alloc_private_va_range(base);
> > +     }
>
> Not sure about that one, it's not shorter, doesn't remove any label but add
> a tab.

Ack.


>
> >
> >       if (ret) {
> >               kvm_err("Cannot allocate hyp stack guard page\n");
> > diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> > index 053e4f733e4b..a39111b70f9f 100644
> > --- a/arch/arm64/kvm/pkvm.c
> > +++ b/arch/arm64/kvm/pkvm.c
> > @@ -190,39 +190,33 @@ bool pkvm_hyp_vm_is_created(struct kvm *kvm)
> >
> >  int pkvm_create_hyp_vm(struct kvm *kvm)
> >  {
> > -     int ret = 0;
> > -
> >       /*
> >        * Synchronise with kvm_arch_prepare_memory_region(), as we
> >        * prevent memslot modifications on a pVM that has been run.
> >        */
> > -     mutex_lock(&kvm->slots_lock);
> > -     mutex_lock(&kvm->arch.config_lock);
> > -     if (!pkvm_hyp_vm_is_created(kvm))
> > -             ret = __pkvm_create_hyp_vm(kvm);
> > -     mutex_unlock(&kvm->arch.config_lock);
> > -     mutex_unlock(&kvm->slots_lock);
> > +     guard(mutex)(&kvm->slots_lock);
> > +     guard(mutex)(&kvm->arch.config_lock);
> >
> > -     return ret;
> > +     if (!pkvm_hyp_vm_is_created(kvm))
> > +             return __pkvm_create_hyp_vm(kvm);
> > +
> > +     return 0;
> >  }
> >
> >  int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
> >  {
> > -     int ret = 0;
> > +     guard(mutex)(&vcpu->kvm->arch.config_lock);
> >
> > -     mutex_lock(&vcpu->kvm->arch.config_lock);
> >       if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
> > -             ret = __pkvm_create_hyp_vcpu(vcpu);
> > -     mutex_unlock(&vcpu->kvm->arch.config_lock);
> > +             return __pkvm_create_hyp_vcpu(vcpu);
> >
> > -     return ret;
> > +     return 0;
> >  }
> >
> >  void pkvm_destroy_hyp_vm(struct kvm *kvm)
> >  {
> > -     mutex_lock(&kvm->arch.config_lock);
> > +     guard(mutex)(&kvm->arch.config_lock);
> >       __pkvm_destroy_hyp_vm(kvm);
> > -     mutex_unlock(&kvm->arch.config_lock);
> >  }
> >
> >  int pkvm_init_host_vm(struct kvm *kvm, unsigned long type)
> > diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
> > index 3b5dbe9a0a0e..e1389c525e9d 100644
> > --- a/arch/arm64/kvm/psci.c
> > +++ b/arch/arm64/kvm/psci.c
> > @@ -62,7 +62,6 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
> >       struct vcpu_reset_state *reset_state;
> >       struct kvm *kvm = source_vcpu->kvm;
> >       struct kvm_vcpu *vcpu = NULL;
> > -     int ret = PSCI_RET_SUCCESS;
> >       unsigned long cpu_id;
> >
> >       cpu_id = smccc_get_arg1(source_vcpu);
> > @@ -78,14 +77,13 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
> >       if (!vcpu)
> >               return PSCI_RET_INVALID_PARAMS;
> >
> > -     spin_lock(&vcpu->arch.mp_state_lock);
> > +     guard(spinlock)(&vcpu->arch.mp_state_lock);
> > +
> >       if (!kvm_arm_vcpu_stopped(vcpu)) {
> >               if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
> > -                     ret = PSCI_RET_ALREADY_ON;
> > +                     return PSCI_RET_ALREADY_ON;
> >               else
> > -                     ret = PSCI_RET_INVALID_PARAMS;
> > -
> > -             goto out_unlock;
> > +                     return PSCI_RET_INVALID_PARAMS;
> >       }
> >
> >       reset_state = &vcpu->arch.reset_state;
> > @@ -113,9 +111,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
> >       WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
> >       kvm_vcpu_wake_up(vcpu);
> >
> > -out_unlock:
> > -     spin_unlock(&vcpu->arch.mp_state_lock);
> > -     return ret;
> > +     return PSCI_RET_SUCCESS;
> >  }
> >
> >  static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
> > @@ -176,9 +172,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
> >        * re-initialized.
> >        */
> >       kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
> > -             spin_lock(&tmp->arch.mp_state_lock);
> > +             guard(spinlock)(&tmp->arch.mp_state_lock);
> >               WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
> > -             spin_unlock(&tmp->arch.mp_state_lock);
> >       }
> >       kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
> >
> > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> > index b963fd975aac..60969d90bdd3 100644
> > --- a/arch/arm64/kvm/reset.c
> > +++ b/arch/arm64/kvm/reset.c
> > @@ -193,10 +193,10 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >       bool loaded;
> >       u32 pstate;
> >
> > -     spin_lock(&vcpu->arch.mp_state_lock);
> > -     reset_state = vcpu->arch.reset_state;
> > -     vcpu->arch.reset_state.reset = false;
> > -     spin_unlock(&vcpu->arch.mp_state_lock);
> > +     scoped_guard(spinlock, &vcpu->arch.mp_state_lock) {
> > +             reset_state = vcpu->arch.reset_state;
> > +             vcpu->arch.reset_state.reset = false;
> > +     }
>
> Same, I don't find this one really interesting.

Ack.

Thanks!
/fuad

>
> >
> >       preempt_disable();
> >       loaded = (vcpu->cpu != -1);
> > --
> > 2.54.0.1136.gdb2ca164c4-goog
> >


^ permalink raw reply

* Re: [PATCH v1 06/11] KVM: arm64: Factor out reusable vCPU reset helpers
From: Vincent Donnefort @ 2026-06-15 13:16 UTC (permalink / raw)
  To: tabba
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <20260612065925.755562-7-tabba@google.com>

On Fri, Jun 12, 2026 at 07:59:20AM +0100, tabba@google.com wrote:
> Pull the reusable pieces out of kvm_reset_vcpu(): expose the reset
> PSTATE values in kvm_arm.h, and split the core register reset and the
> PSCI-driven reset into kvm_reset_vcpu_core() and kvm_reset_vcpu_psci().
> A follow-up series reuses these to reset protected vCPUs at EL2.
> 
> No functional change intended.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  arch/arm64/include/asm/kvm_arm.h     | 12 ++++++
>  arch/arm64/include/asm/kvm_emulate.h | 58 +++++++++++++++++++++++++++
>  arch/arm64/kvm/reset.c               | 60 ++--------------------------
>  3 files changed, 73 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3f9233b5a130..aba4ec09acd2 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -348,4 +348,16 @@
>  	{ PSR_AA32_MODE_UND,	"32-bit UND" },	\
>  	{ PSR_AA32_MODE_SYS,	"32-bit SYS" }
>  
> +/*
> + * ARMv8 Reset Values
> + */
> +#define VCPU_RESET_PSTATE_EL1	(PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
> +				 PSR_F_BIT | PSR_D_BIT)
> +
> +#define VCPU_RESET_PSTATE_EL2	(PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
> +				 PSR_F_BIT | PSR_D_BIT)
> +
> +#define VCPU_RESET_PSTATE_SVC	(PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
> +				 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
> +
>  #endif /* __ARM64_KVM_ARM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index aed9fc0b717b..8436e71c402d 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -704,4 +704,62 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
>  			vcpu->arch.hcrx_el2 |= HCRX_EL2_EnASR;
>  	}
>  }
> +
> +/* Reset a vcpu's core registers. */
> +static inline void kvm_reset_vcpu_core(struct kvm_vcpu *vcpu)
> +{
> +	u32 pstate;
> +
> +	if (vcpu_el1_is_32bit(vcpu)) {
> +		pstate = VCPU_RESET_PSTATE_SVC;
> +	} else if (vcpu_has_nv(vcpu)) {
> +		pstate = VCPU_RESET_PSTATE_EL2;
> +	} else {
> +		pstate = VCPU_RESET_PSTATE_EL1;
> +	}

nit: no brackes here, actually there were none before.

> +
> +	/* Reset core registers */
> +	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
> +	memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
> +	vcpu->arch.ctxt.spsr_abt = 0;
> +	vcpu->arch.ctxt.spsr_und = 0;
> +	vcpu->arch.ctxt.spsr_irq = 0;
> +	vcpu->arch.ctxt.spsr_fiq = 0;
> +	vcpu_gp_regs(vcpu)->pstate = pstate;
> +}
> +
> +/* PSCI reset handling for a vcpu. */
> +static inline void kvm_reset_vcpu_psci(struct kvm_vcpu *vcpu,
> +				       struct vcpu_reset_state *reset_state)
> +{
> +	unsigned long target_pc = reset_state->pc;
> +
> +	/* Gracefully handle Thumb2 entry point */
> +	if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
> +		target_pc &= ~1UL;
> +		vcpu_set_thumb(vcpu);
> +	}
> +
> +	/* Propagate caller endianness */
> +	if (reset_state->be)
> +		kvm_vcpu_set_be(vcpu);
> +
> +	*vcpu_pc(vcpu) = target_pc;
> +
> +	/*
> +	 * We may come from a state where either a PC update was
> +	 * pending (SMC call resulting in PC being increpented to
> +	 * skip the SMC) or a pending exception. Make sure we get
> +	 * rid of all that, as this cannot be valid out of reset.
> +	 *
> +	 * Note that clearing the exception mask also clears PC
> +	 * updates, but that's an implementation detail, and we
> +	 * really want to make it explicit.
> +	 */
> +	vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
> +	vcpu_clear_flag(vcpu, EXCEPT_MASK);
> +	vcpu_clear_flag(vcpu, INCREMENT_PC);
> +	vcpu_set_reg(vcpu, 0, reset_state->r0);
> +}
> +
>  #endif /* __ARM64_KVM_EMULATE_H__ */
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 60969d90bdd3..e22d0be9e57c 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -34,18 +34,6 @@
>  static u32 __ro_after_init kvm_ipa_limit;
>  unsigned int __ro_after_init kvm_host_sve_max_vl;
>  
> -/*
> - * ARMv8 Reset Values
> - */
> -#define VCPU_RESET_PSTATE_EL1	(PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
> -				 PSR_F_BIT | PSR_D_BIT)
> -
> -#define VCPU_RESET_PSTATE_EL2	(PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
> -				 PSR_F_BIT | PSR_D_BIT)
> -
> -#define VCPU_RESET_PSTATE_SVC	(PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
> -				 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
> -
>  unsigned int __ro_after_init kvm_sve_max_vl;
>  
>  int __init kvm_arm_init_sve(void)
> @@ -191,7 +179,6 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_reset_state reset_state;
>  	bool loaded;
> -	u32 pstate;
>  
>  	scoped_guard(spinlock, &vcpu->arch.mp_state_lock) {
>  		reset_state = vcpu->arch.reset_state;
> @@ -210,21 +197,8 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  		kvm_vcpu_reset_sve(vcpu);
>  	}
>  
> -	if (vcpu_el1_is_32bit(vcpu))
> -		pstate = VCPU_RESET_PSTATE_SVC;
> -	else if (vcpu_has_nv(vcpu))
> -		pstate = VCPU_RESET_PSTATE_EL2;
> -	else
> -		pstate = VCPU_RESET_PSTATE_EL1;
> -
>  	/* Reset core registers */
> -	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
> -	memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
> -	vcpu->arch.ctxt.spsr_abt = 0;
> -	vcpu->arch.ctxt.spsr_und = 0;
> -	vcpu->arch.ctxt.spsr_irq = 0;
> -	vcpu->arch.ctxt.spsr_fiq = 0;
> -	vcpu_gp_regs(vcpu)->pstate = pstate;
> +	kvm_reset_vcpu_core(vcpu);
>  
>  	/* Reset system registers */
>  	kvm_reset_sys_regs(vcpu);
> @@ -233,36 +207,8 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  	 * Additional reset state handling that PSCI may have imposed on us.
>  	 * Must be done after all the sys_reg reset.
>  	 */
> -	if (reset_state.reset) {
> -		unsigned long target_pc = reset_state.pc;
> -
> -		/* Gracefully handle Thumb2 entry point */
> -		if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
> -			target_pc &= ~1UL;
> -			vcpu_set_thumb(vcpu);
> -		}
> -
> -		/* Propagate caller endianness */
> -		if (reset_state.be)
> -			kvm_vcpu_set_be(vcpu);
> -
> -		*vcpu_pc(vcpu) = target_pc;
> -
> -		/*
> -		 * We may come from a state where either a PC update was
> -		 * pending (SMC call resulting in PC being increpented to
> -		 * skip the SMC) or a pending exception. Make sure we get
> -		 * rid of all that, as this cannot be valid out of reset.
> -		 *
> -		 * Note that clearing the exception mask also clears PC
> -		 * updates, but that's an implementation detail, and we
> -		 * really want to make it explicit.
> -		 */
> -		vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
> -		vcpu_clear_flag(vcpu, EXCEPT_MASK);
> -		vcpu_clear_flag(vcpu, INCREMENT_PC);
> -		vcpu_set_reg(vcpu, 0, reset_state.r0);
> -	}
> +	if (reset_state.reset)
> +		kvm_reset_vcpu_psci(vcpu, &reset_state);
>  
>  	/* Reset timer */
>  	kvm_timer_vcpu_reset(vcpu);
> -- 
> 2.54.0.1136.gdb2ca164c4-goog
> 


^ permalink raw reply

* Re: [PATCH v3 2/3] drivers/firmware: add SDEI cross-CPU NMI service for arm64
From: Kiryl Shutsemau @ 2026-06-15 13:15 UTC (permalink / raw)
  To: Puranjay Mohan
  Cc: Catalin Marinas, Will Deacon, James Morse, Mark Rutland,
	Marc Zyngier, Doug Anderson, Petr Mladek, Thomas Gleixner,
	Andrew Morton, Baoquan He, Usama Arif, Breno Leitao,
	Julien Thierry, Lecopzer Chen, Sumit Garg, kernel-team, kexec,
	linux-arm-kernel, linux-kernel
In-Reply-To: <CANk7y0iBTYuhzLTJAX0yf8Cp8cyThOVpAQYMTDN9LZUKGThqJQ@mail.gmail.com>

On Mon, Jun 15, 2026 at 12:18:10PM +0200, Puranjay Mohan wrote:
> On Mon, Jun 15, 2026 at 4:35 AM Kiryl Shutsemau <kirill@shutemov.name> wrote:
> >
> > From: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
> >
> > Deliver an NMI-like event to an interrupt-masked arm64 CPU via the
> > standard SDEI software-signalled event (event 0), without the pseudo-NMI
> > hot-path cost: register a handler for event 0 and poke a target with
> > sdei_event_signal(0, mpidr).
> >
> > First user is arch_trigger_cpumask_backtrace() (sysrq-l, RCU stalls,
> > hung-task/soft-lockup dumps), which otherwise rides an IPI that can't
> > reach a masked CPU. Falls back to the IPI path when SDEI is absent; no
> > watchdog backend yet, so the stock detector is untouched.
> >
> > Signed-off-by: Kiryl Shutsemau (Meta) <kas@kernel.org>
> > Reviewed-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >  MAINTAINERS                     |   2 +-
> >  arch/arm64/include/asm/nmi.h    |  24 +++++
> >  arch/arm64/kernel/smp.c         |  11 +++
> >  drivers/firmware/Kconfig        |  19 ++++
> >  drivers/firmware/Makefile       |   1 +
> >  drivers/firmware/arm_sdei_nmi.c | 149 ++++++++++++++++++++++++++++++++
> >  6 files changed, 205 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/arm64/include/asm/nmi.h
> >  create mode 100644 drivers/firmware/arm_sdei_nmi.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index c8d4b913f26c..b5ddfb85dce9 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -24797,7 +24797,7 @@ M:      James Morse <james.morse@arm.com>
> >  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
> >  S:     Maintained
> >  F:     Documentation/devicetree/bindings/arm/firmware/sdei.txt
> > -F:     drivers/firmware/arm_sdei.c
> > +F:     drivers/firmware/arm_sdei*
> >  F:     include/linux/arm_sdei.h
> >  F:     include/uapi/linux/arm_sdei.h
> >
> > diff --git a/arch/arm64/include/asm/nmi.h b/arch/arm64/include/asm/nmi.h
> > new file mode 100644
> > index 000000000000..9366be419d18
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/nmi.h
> > @@ -0,0 +1,24 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __ASM_NMI_H
> > +#define __ASM_NMI_H
> > +
> > +#include <linux/cpumask.h>
> > +
> > +/*
> > + * Cross-CPU NMI provider hooks, consulted by the arm64 arch code before
> > + * its regular-IRQ / pseudo-NMI IPI paths. The SDEI provider in
> > + * drivers/firmware/arm_sdei_nmi.c implements them when active; a future
> > + * FEAT_NMI provider could slot in here too. The stubs let callers stay
> > + * unconditional when ARM_SDEI_NMI is off.
> > + */
> > +#ifdef CONFIG_ARM_SDEI_NMI
> > +bool sdei_nmi_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
> > +#else
> > +static inline bool sdei_nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> > +                                                     int exclude_cpu)
> > +{
> > +       return false;
> > +}
> > +#endif
> > +
> > +#endif /* __ASM_NMI_H */
> > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> > index 1aa324104afb..a670434a8cae 100644
> > --- a/arch/arm64/kernel/smp.c
> > +++ b/arch/arm64/kernel/smp.c
> > @@ -45,6 +45,7 @@
> >  #include <asm/daifflags.h>
> >  #include <asm/kvm_mmu.h>
> >  #include <asm/mmu_context.h>
> > +#include <asm/nmi.h>
> >  #include <asm/numa.h>
> >  #include <asm/processor.h>
> >  #include <asm/smp_plat.h>
> > @@ -927,6 +928,16 @@ static void arm64_backtrace_ipi(cpumask_t *mask)
> >
> >  void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
> >  {
> > +       /*
> > +        * Prefer the SDEI cross-CPU NMI provider when active: firmware
> > +        * dispatches the event out of EL3 and reaches CPUs that have
> > +        * interrupts locally masked, without the per-IRQ-mask cost that
> > +        * pseudo-NMI pays for the same reach. The plain IPI path below
> > +        * can't reach such a CPU unless pseudo-NMI is enabled.
> > +        */
> > +       if (sdei_nmi_trigger_cpumask_backtrace(mask, exclude_cpu))
> > +               return;
> > +
> >         /*
> >          * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
> >          * nothing about it truly needs to be implemented using an NMI, it's
> > diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> > index bbd2155d8483..6501087ff90d 100644
> > --- a/drivers/firmware/Kconfig
> > +++ b/drivers/firmware/Kconfig
> > @@ -36,6 +36,25 @@ config ARM_SDE_INTERFACE
> >           standard for registering callbacks from the platform firmware
> >           into the OS. This is typically used to implement RAS notifications.
> >
> > +config ARM_SDEI_NMI
> > +       bool "SDEI-based cross-CPU NMI service (arm64)"
> > +       depends on ARM64 && ARM_SDE_INTERFACE
> > +       help
> > +         Provides SDEI-based cross-CPU NMI delivery for hooks that need
> > +         to reach interrupt-masked CPUs on silicon that lacks FEAT_NMI:
> > +
> > +           - arch_trigger_cpumask_backtrace()  (sysrq-l, RCU stalls,
> > +             hardlockup_all_cpu_backtrace, soft-lockup secondary dumps,
> > +             hung-task auxiliary dumps)
> > +
> > +         The driver registers a handler for the SDEI software-signalled
> > +         event (event 0) and reaches a target CPU by signalling it with
> > +         SDEI_EVENT_SIGNAL. Firmware delivers the event out of EL3
> > +         regardless of the target's PSTATE.DAIF -- forced delivery into a
> > +         CPU wedged with interrupts locally masked.
> > +
> > +         If unsure, say N.
> > +
> >  config EDD
> >         tristate "BIOS Enhanced Disk Drive calls determine boot disk"
> >         depends on X86
> > diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
> > index 4ddec2820c96..be46f1e1dc77 100644
> > --- a/drivers/firmware/Makefile
> > +++ b/drivers/firmware/Makefile
> > @@ -4,6 +4,7 @@
> >  #
> >  obj-$(CONFIG_ARM_SCPI_PROTOCOL)        += arm_scpi.o
> >  obj-$(CONFIG_ARM_SDE_INTERFACE)        += arm_sdei.o
> > +obj-$(CONFIG_ARM_SDEI_NMI)     += arm_sdei_nmi.o
> >  obj-$(CONFIG_DMI)              += dmi_scan.o
> >  obj-$(CONFIG_DMI_SYSFS)                += dmi-sysfs.o
> >  obj-$(CONFIG_EDD)              += edd.o
> > diff --git a/drivers/firmware/arm_sdei_nmi.c b/drivers/firmware/arm_sdei_nmi.c
> > new file mode 100644
> > index 000000000000..a82776e7b55a
> > --- /dev/null
> > +++ b/drivers/firmware/arm_sdei_nmi.c
> > @@ -0,0 +1,149 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * arm64 SDEI-based cross-CPU NMI service.
> > + *
> > + * Delivering an "NMI-shaped" event to an EL1 context that has locally
> > + * masked interrupts, on silicon without FEAT_NMI, can be done two ways:
> > + *
> > + *   - pseudo-NMI: mask "interrupts" via the GIC priority register
> > + *     (ICC_PMR_EL1) instead of PSTATE.DAIF, leaving a high-priority band
> > + *     deliverable. Functionally this works -- but it reimplements every
> > + *     local_irq_disable()/enable() and exception entry/exit as a PMR
> > + *     write plus synchronisation, a cost paid on that hot path forever,
> > + *     whether or not an NMI is ever delivered.
> > + *
> > + *   - SDEI: leave interrupt masking as the cheap PSTATE.DAIF operation
> > + *     and have the firmware bounce an EL3-routed Group-0 SGI back to
> > + *     NS-EL1 as an event callback. The cost is a firmware round-trip,
> > + *     but only at the rare moment delivery is actually needed.
> > + *
> > + * This driver takes the second path: it keeps the IRQ-mask hot path
> > + * free and pays only when it fires, which is what makes cross-CPU NMI
> > + * affordable on hardware where the pseudo-NMI tax isn't, until FEAT_NMI
> > + * makes NMI masking cheap in the architecture itself.
> > + *
> > + * Capabilities provided:
> > + *
> > + *   - sdei_nmi_trigger_cpumask_backtrace() — override for arm64's
> > + *     arch_trigger_cpumask_backtrace(), so sysrq-l, RCU stall dumps,
> > + *     hardlockup_all_cpu_backtrace, soft-lockup/hung-task secondary
> > + *     dumps all reach interrupt-masked CPUs.
> > + *
> > + * Delivery uses the standard SDEI software-signalled event (event 0) and
> > + * SDEI_EVENT_SIGNAL. We register a handler for event 0, enable it, and
> > + * poke a target CPU with sdei_event_signal(0, mpidr): firmware makes
> > + * event 0 pending on that PE and dispatches the handler NMI-like,
> > + * regardless of the target's DAIF.
> > + * Availability is simply whether event 0 registers and enables -- if SDEI
> > + * and its software-signalled event are present we use it, otherwise the
> > + * driver stays inert.
> > + */
> > +
> > +#define pr_fmt(fmt) "sdei_nmi: " fmt
> > +
> > +#include <linux/arm_sdei.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/kprobes.h>
> > +#include <linux/nmi.h>
> > +#include <linux/printk.h>
> > +#include <linux/ptrace.h>
> > +#include <linux/smp.h>
> > +#include <linux/types.h>
> > +
> > +#include <asm/nmi.h>
> > +#include <asm/smp_plat.h>
> > +
> > +static bool sdei_nmi_available;
> > +
> > +#define SDEI_NMI_EVENT                 0
> > +
> > +static int sdei_nmi_handler(u32 event, struct pt_regs *regs, void *arg)
> > +{
> > +       /*
> > +        * nmi_cpu_backtrace() no-ops unless this CPU's bit is set in the
> > +        * global backtrace mask (driven by nmi_trigger_cpumask_backtrace()),
> > +        * so a fire that reaches a CPU not being backtraced is harmless.
> > +        */
> > +       nmi_cpu_backtrace(regs);
> > +       return SDEI_EV_HANDLED;
> > +}
> > +NOKPROBE_SYMBOL(sdei_nmi_handler);
> > +
> > +static void sdei_nmi_fire(unsigned int target_cpu)
> > +{
> > +       int err = sdei_event_signal(SDEI_NMI_EVENT, cpu_logical_map(target_cpu));
> > +
> > +       if (err)
> > +               pr_warn("SDEI_EVENT_SIGNAL to CPU %u failed: %d\n",
> > +                       target_cpu, err);
> > +}
> > +
> > +/*
> > + * Raise callback for nmi_trigger_cpumask_backtrace(): signal event 0
> > + * at every CPU still pending in @mask. The framework excludes the local
> > + * CPU from @mask before calling us.
> > + */
> > +static void sdei_nmi_raise_backtrace(cpumask_t *mask)
> > +{
> > +       unsigned int cpu;
> > +
> > +       for_each_cpu(cpu, mask)
> > +               sdei_nmi_fire(cpu);
> > +}
> > +
> > +/*
> > + * Override hook for arch_trigger_cpumask_backtrace() (see
> > + * arch/arm64/kernel/smp.c). Returns true when SDEI handled the request,
> > + * which is the case whenever SDEI is active; on a false return the arch
> > + * falls back to its regular-IRQ (or pseudo-NMI, if enabled) IPI.
> > + *
> > + * On a kernel built without paying the pseudo-NMI hot-path cost (the
> > + * usual case for this driver's target), the IPI can't reach a CPU that
> > + * has interrupts masked -- so the backtrace of the one CPU you care
> > + * about comes back empty. SDEI is dispatched out of EL3 and lands
> > + * regardless of the target's DAIF, without taxing the IRQ-mask path.
> > + */
> > +bool sdei_nmi_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
> > +{
> > +       if (!sdei_nmi_available)
> > +               return false;
> > +
> > +       nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
> > +                                     sdei_nmi_raise_backtrace);
> > +       return true;
> > +}
> > +
> > +/*
> > + * device_initcall (after arch_initcall(sdei_init), so the SDEI subsystem
> > + * is up): probe the firmware, register the event, and turn on the
> > + * cross-CPU service. If the probe fails the driver stays inert and the
> > + * override hooks decline, leaving the arch's own paths in place.
> > + */
> > +static int __init sdei_nmi_init(void)
> > +{
> > +       int err;
> > +
> > +       err = sdei_event_register(SDEI_NMI_EVENT, sdei_nmi_handler, NULL);
> > +       if (err) {
> > +               pr_err("sdei_event_register(%u) failed: %d\n",
> > +                      SDEI_NMI_EVENT, err);
> > +               return 0;
> > +       }
> 
> This initcall runs unconditionally whenever ARM_SDEI_NMI is built in,
> which includes the many arm64 systems that have no SDEI at all. On
> those, sdei_event_register() -> sdei_event_create() ->
> invoke_sdei_fn() returns -EIO, and the core already complains:
>     pr_warn("Failed to create event %u: %d\n", event_num, err);

Fair enough. I will add sdei_is_present() and gate sdei_nmi_init() on
it.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov


^ permalink raw reply

* Re: [PATCH v1 02/11] KVM: arm64: Use guard(hyp_spinlock) in pKVM hypervisor code
From: Fuad Tabba @ 2026-06-15 13:11 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <ai_1z13TyEXbHx4Q@google.com>

Hi Vincent,

On Mon, 15 Jun 2026 at 13:53, Vincent Donnefort <vdonnefort@google.com> wrote:
>
> On Fri, Jun 12, 2026 at 07:59:16AM +0100, tabba@google.com wrote:
> > Convert the manual hyp_spin_lock()/hyp_spin_unlock() pairs in
> > arch/arm64/kvm/hyp/nvhe/{pkvm,mm,page_alloc,ffa}.c to
> > guard(hyp_spinlock) and scoped_guard(hyp_spinlock), dropping several
> > unlock-only goto labels in favour of direct returns.
> >
> > hyp_fixblock_lock in mm.c is left as an explicit lock/unlock pair: it is
> > acquired in hyp_fixblock_map() and released in hyp_fixblock_unmap(), so
> > its critical section spans two functions and cannot be expressed as a
> > single lexical scope.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/kvm/hyp/nvhe/ffa.c        | 154 +++++++++++----------------
> >  arch/arm64/kvm/hyp/nvhe/mm.c         |  37 ++-----
> >  arch/arm64/kvm/hyp/nvhe/page_alloc.c |  13 +--
> >  arch/arm64/kvm/hyp/nvhe/pkvm.c       |  86 +++++----------
> >  4 files changed, 105 insertions(+), 185 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > index 1af722771178..46cd4fa924be 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > @@ -313,17 +313,16 @@ static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res,
> >                             struct kvm_cpu_context *ctxt)
> >  {
> >       DECLARE_REG(u32, id, ctxt, 1);
> > -     int ret = 0;
> >
> >       if (id != HOST_FFA_ID) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> > -     hyp_spin_lock(&host_buffers.lock);
> > +     guard(hyp_spinlock)(&host_buffers.lock);
> >       if (!host_buffers.tx) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       hyp_unpin_shared_mem(host_buffers.tx, host_buffers.tx + 1);
> > @@ -336,10 +335,7 @@ static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res,
> >
> >       ffa_unmap_hyp_buffers();
> >
> > -out_unlock:
> > -     hyp_spin_unlock(&host_buffers.lock);
> > -out:
> > -     ffa_to_smccc_res(res, ret);
> > +     ffa_to_smccc_res(res, 0);
> >  }
> >
> >  static u32 __ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges,
> > @@ -418,18 +414,20 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
> >       DECLARE_REG(u32, fraglen, ctxt, 3);
> >       DECLARE_REG(u32, endpoint_id, ctxt, 4);
> >       struct ffa_mem_region_addr_range *buf;
> > -     int ret = FFA_RET_INVALID_PARAMETERS;
> > +     int ret;
> >       u32 nr_ranges;
>
> nit: inverted christmas tree

Ack.

>
> >
> > -     if (fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)
> > -             goto out;
> > +     if (fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE ||
> > +         fraglen % sizeof(*buf)) {
>
> nit: I don't know if we wouldn't want extra parenthesis here for readability.

Sure, will add parentheses around the second operand.

>
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> > +     }
> >
> > -     if (fraglen % sizeof(*buf))
> > -             goto out;
> > -
> > -     hyp_spin_lock(&host_buffers.lock);
> > -     if (!host_buffers.tx)
> > -             goto out_unlock;
> > +     guard(hyp_spinlock)(&host_buffers.lock);
> > +     if (!host_buffers.tx) {
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> > +     }
> >
> >       buf = hyp_buffers.tx;
> >       memcpy(buf, host_buffers.tx, fraglen);
> > @@ -444,19 +442,14 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
> >                */
> >               ffa_mem_reclaim(res, handle_lo, handle_hi, 0);
> >               WARN_ON(res->a0 != FFA_SUCCESS);
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, ret);
> > +             return;
> >       }
> >
> >       ffa_mem_frag_tx(res, handle_lo, handle_hi, fraglen, endpoint_id);
> >       if (res->a0 != FFA_SUCCESS && res->a0 != FFA_MEM_FRAG_RX)
> >               WARN_ON(ffa_host_unshare_ranges(buf, nr_ranges));
> >
> > -out_unlock:
> > -     hyp_spin_unlock(&host_buffers.lock);
> > -out:
> > -     if (ret)
> > -             ffa_to_smccc_res(res, ret);
> > -
> >       /*
> >        * If for any reason this did not succeed, we're in trouble as we have
> >        * now lost the content of the previous fragments and we can't rollback
> > @@ -465,7 +458,6 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
> >        * sharing/donating them again and may possibly lead to subsequent
> >        * failures, but this will not compromise confidentiality.
> >        */
> > -     return;
> >  }
> >
> >  static void __do_ffa_mem_xfer(const u64 func_id,
> > @@ -480,29 +472,29 @@ static void __do_ffa_mem_xfer(const u64 func_id,
> >       struct ffa_composite_mem_region *reg;
> >       struct ffa_mem_region *buf;
> >       u32 offset, nr_ranges, checked_offset;
> > -     int ret = 0;
> > +     int ret;
> >
> >       if (addr_mbz || npages_mbz || fraglen > len ||
> >           fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       if (fraglen < sizeof(struct ffa_mem_region) +
> >                     sizeof(struct ffa_mem_region_attributes)) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> > -     hyp_spin_lock(&host_buffers.lock);
> > +     guard(hyp_spinlock)(&host_buffers.lock);
> >       if (!host_buffers.tx) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       if (len > ffa_desc_buf.len) {
> > -             ret = FFA_RET_NO_MEMORY;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_NO_MEMORY);
> > +             return;
> >       }
> >
> >       buf = hyp_buffers.tx;
> > @@ -512,53 +504,41 @@ static void __do_ffa_mem_xfer(const u64 func_id,
> >                       ffa_mem_desc_offset(buf, 0, hyp_ffa_version);
> >       offset = ep_mem_access->composite_off;
> >       if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       if (fraglen < checked_offset) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       reg = (void *)buf + offset;
> >       nr_ranges = ((void *)buf + fraglen) - (void *)reg->constituents;
> >       if (nr_ranges % sizeof(reg->constituents[0])) {
> > -             ret = FFA_RET_INVALID_PARAMETERS;
> > -             goto out_unlock;
> > +             ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
> > +             return;
> >       }
> >
> >       nr_ranges /= sizeof(reg->constituents[0]);
> >       ret = ffa_host_share_ranges(reg->constituents, nr_ranges);
> > -     if (ret)
> > -             goto out_unlock;
> > +     if (ret) {
> > +             ffa_to_smccc_res(res, ret);
> > +             return;
> > +     }
> >
> >       ffa_mem_xfer(res, func_id, len, fraglen);
> >       if (fraglen != len) {
> > -             if (res->a0 != FFA_MEM_FRAG_RX)
> > -                     goto err_unshare;
> > -
> > -             if (res->a3 != fraglen)
> > -                     goto err_unshare;
> > +             if (res->a0 != FFA_MEM_FRAG_RX || res->a3 != fraglen)
> > +                     WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
> >       } else if (res->a0 != FFA_SUCCESS) {
> > -             goto err_unshare;
> > +             WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
>
> I am not sure this is really better for this function. At least we had a single
> callsite to this WARN_ON(ffa_host_unshare_ranges) ...
>
> Or alternatively if we really want guard() this can just set ret = XXX and then
>
>   if (ret)
>       WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
>
> So we can keep a single call site for the rollback.

Agreed, the single rollback callsite is better. I'll use a flag to
keep the original control flow readable:

>
> >       }
> > -
> > -out_unlock:
> > -     hyp_spin_unlock(&host_buffers.lock);
> > -out:
> > -     if (ret)
> > -             ffa_to_smccc_res(res, ret);
> > -     return;
> > -
> > -err_unshare:
> > -     WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
> > -     goto out_unlock;
> >  }
> >
>
> [...]
>
> >  int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
> > @@ -996,22 +975,19 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
> >       struct kvm *host_kvm;
> >       unsigned int idx;
> >       size_t vm_size;
> > -     int err;
> >
> > -     hyp_spin_lock(&vm_table_lock);
> > -     hyp_vm = get_pkvm_unref_hyp_vm_locked(handle);
> > -     if (!hyp_vm || !hyp_vm->kvm.arch.pkvm.is_dying) {
> > -             err = -EINVAL;
> > -             goto err_unlock;
> > +     scoped_guard(hyp_spinlock, &vm_table_lock) {
> > +             hyp_vm = get_pkvm_unref_hyp_vm_locked(handle);
> > +             if (!hyp_vm || !hyp_vm->kvm.arch.pkvm.is_dying)
> > +                     return -EINVAL;
> > +
> > +             host_kvm = hyp_vm->host_kvm;
> > +
> > +             /* Ensure the VMID is clean before it can be reallocated */
> > +             __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
> > +             remove_vm_table_entry(handle);
> >       }
> >
> > -     host_kvm = hyp_vm->host_kvm;
> > -
> > -     /* Ensure the VMID is clean before it can be reallocated */
> > -     __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
> > -     remove_vm_table_entry(handle);
> > -     hyp_spin_unlock(&vm_table_lock);
> > -
> >       /* Reclaim guest pages (including page-table pages) */
> >       mc = &host_kvm->arch.pkvm.teardown_mc;
> >       stage2_mc = &host_kvm->arch.pkvm.stage2_teardown_mc;
> > @@ -1042,10 +1018,6 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
> >       teardown_donated_memory(mc, hyp_vm, vm_size);
> >       hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
> >       return 0;
> > -
> > -err_unlock:
> > -     hyp_spin_unlock(&vm_table_lock);
> > -     return err;
>
> For this one too I doubt this is really interesting: only one path using
> err_unlock and actually the entire label could be just removed to to simply do
> hyp_spin_unlock() return -EINVAL;
>
> This would avoid adding another tab with that scoped_guard(). But that's
> probably my aversion to scoped_guard() talking.

The scoped_guard makes the lock scope visually explicit here: the
lock must be dropped before the long teardown tail that follows, and
scoped_guard makes that boundary clear. It also prevents forgetting
the unlock on the early return. I think the extra tab is worth it for the
clarity it provides.

Thanks,
/fuad


>
> >  }
> >
> >  static u64 __pkvm_memshare_page_req(struct kvm_vcpu *vcpu, u64 ipa)
> > --
> > 2.54.0.1136.gdb2ca164c4-goog
> >


^ permalink raw reply

* Re: [PATCH v1 05/11] KVM: arm64: Make vcpu_{read,write}_sys_reg available to HYP code
From: Vincent Donnefort @ 2026-06-15 13:11 UTC (permalink / raw)
  To: tabba
  Cc: Marc Zyngier, Oliver Upton, Will Deacon, Catalin Marinas,
	Quentin Perret, Sebastian Ene, Per Larsen, Suzuki K Poulose,
	Zenghui Yu, Joey Gouly, Steffen Eiden, Mark Rutland,
	Jonathan Cameron, Hyunwoo Kim, linux-arm-kernel, kvmarm,
	linux-kernel
In-Reply-To: <20260612065925.755562-6-tabba@google.com>

On Fri, Jun 12, 2026 at 07:59:19AM +0100, tabba@google.com wrote:
> The vcpu_{read,write}_sys_reg() accessors are host-only, so helpers
> built on them such as kvm_vcpu_set_be()/kvm_vcpu_is_be() cannot be
> shared with hyp code. Add _vcpu_read_sys_reg()/_vcpu_write_sys_reg()
> inlines in kvm_emulate.h that dispatch on is_nvhe_hyp_code() to the
> host- or hyp-side accessor. A follow-up series uses this to share that
> emulation code at EL2.
> 
> No functional change intended.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  arch/arm64/include/asm/kvm_emulate.h | 22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index 5bf3d7e1d92c..aed9fc0b717b 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -506,6 +506,22 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
>  	return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
>  }
>  
> +static inline u64 _vcpu_read_sys_reg(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg)

I am not sure a single underscore is widely used in the kernel.

Why not move __vcpu_read_sys_reg() and __vcpu_write_sys_reg() from exception.c
to kvm_emulate.h? especially this already checks has_vhe().

> +{
> +	if (!is_nvhe_hyp_code())
> +		return vcpu_read_sys_reg(vcpu, reg);
> +
> +	return __vcpu_sys_reg(vcpu, reg);
> +}
> +
> +static inline void _vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg)
> +{
> +	if (!is_nvhe_hyp_code())
> +		vcpu_write_sys_reg(vcpu, val, reg);
> +	else
> +		__vcpu_assign_sys_reg(vcpu, reg, val);
> +}
> +
>  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  {
>  	if (vcpu_mode_is_32bit(vcpu)) {
> @@ -516,9 +532,9 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
>  
>  		r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
>  
> -		sctlr = vcpu_read_sys_reg(vcpu, r);
> +		sctlr = _vcpu_read_sys_reg(vcpu, r);
>  		sctlr |= SCTLR_ELx_EE;
> -		vcpu_write_sys_reg(vcpu, sctlr, r);
> +		_vcpu_write_sys_reg(vcpu, sctlr, r);
>  	}
>  }
>  
> @@ -533,7 +549,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
>  	r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
>  	bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E;
>  
> -	return vcpu_read_sys_reg(vcpu, r) & bit;
> +	return _vcpu_read_sys_reg(vcpu, r) & bit;
>  }
>  
>  static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
> -- 
> 2.54.0.1136.gdb2ca164c4-goog
> 


^ permalink raw reply

* [PATCH] KVM: arm64: nv: Write ESR_EL2 for injected nested SError exceptions
From: Fuad Tabba @ 2026-06-15 13:11 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton, Catalin Marinas, Will Deacon
  Cc: Joey Gouly, Suzuki K Poulose, Zenghui Yu, kvmarm,
	linux-arm-kernel, linux-kernel, tabba

kvm_inject_el2_exception() writes ESR_EL2 for synchronous exceptions
but not for SError. enter_exception64() does not write ESR_ELx for any
exception type, so the constructed syndrome is dropped. A guest L2
hypervisor taking a nested SError observes stale ESR_EL2.

This affects both kvm_inject_nested_serror() and the EASE path in
kvm_inject_nested_sea().

Write ESR_EL2 for except_type_serror, matching except_type_sync.

Fixes: 77ee70a07357 ("KVM: arm64: nv: Honor SError exception routing / masking")
Reported-by: sashiko <sashiko@sashiko.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/emulate-nested.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 22d497554c94..c2580d40197e 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2750,6 +2750,7 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
 		break;
 	case except_type_serror:
 		kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+		vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
 		break;
 	default:
 		WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
-- 
2.54.0.1136.gdb2ca164c4-goog



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox