Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v7 2/9] dmaengine: Add safe API to combine configuration and preparation
From: Frank.Li @ 2026-05-21 15:32 UTC (permalink / raw)
  To: Vinod Koul, Manivannan Sadhasivam, Krzysztof Wilczyński,
	Kishon Vijay Abraham I, Bjorn Helgaas, Christoph Hellwig,
	Sagi Grimberg, Chaitanya Kulkarni, Herbert Xu, David S. Miller,
	Nicolas Ferre, Alexandre Belloni, Claudiu Beznea, Koichiro Den,
	Niklas Cassel
  Cc: dmaengine, linux-kernel, linux-pci, linux-nvme, mhi,
	linux-arm-msm, linux-crypto, linux-arm-kernel, imx, Frank Li
In-Reply-To: <20260521-dma_prep_config-v7-0-1f73f4899883@nxp.com>

From: Frank Li <Frank.Li@nxp.com>

Introduce dmaengine_prep_config_single_safe() and
dmaengine_prep_config_sg_safe() to provide a reentrant-safe way to
combine slave configuration and transfer preparation.

Drivers may implement the new device_prep_config_sg() callback to perform
both steps atomically. If the callback is not provided, the helpers fall
back to calling dmaengine_slave_config() followed by
dmaengine_prep_slave_sg() under per-channel spinlock protection.

Tested-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
change in v6
- replace mutex with spinlock in commit message
- use spinlock_saveirq according to AI review results

"The documentation in struct dma_chan notes that *_prep() may be called
from a completion callback. Since completion callbacks often execute in
softirq or hardirq contexts, if a thread calls this function from
process context, local interrupts remain enabled.

If a DMA interrupt fires on the same CPU while the lock is held, the
completion callback could attempt to call this function again to queue
the next transfer, leading it to wait on the already-held chan->lock.

Does this fallback path need to use spin_lock_irqsave() and
spin_unlock_irqrestore() to safely disable interrupts?
"

chagne in v5
- remove reduntant lock commments.
- use kernel doc to descritp API

chagne in v4
- use spinlock() to protect config() and prep()

change in v3
- new patch
---
 drivers/dma/dmaengine.c   |  2 ++
 include/linux/dmaengine.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 405bd2fbb4a3b..ba29e60160c1a 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1099,6 +1099,8 @@ static int __dma_async_device_channel_register(struct dma_device *device,
 	chan->dev->device.parent = device->dev;
 	chan->dev->chan = chan;
 	chan->dev->dev_id = device->dev_id;
+	spin_lock_init(&chan->lock);
+
 	if (!name)
 		dev_set_name(&chan->dev->device, "dma%dchan%d", device->dev_id, chan->chan_id);
 	else
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index defa377d2ef54..6fe46c0c94527 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -322,6 +322,8 @@ struct dma_router {
  * @slave: ptr to the device using this channel
  * @cookie: last cookie value returned to client
  * @completed_cookie: last completed cookie for this channel
+ * @lock: protect between config and prepare transfer when driver have not
+ *	  implemented callback device_prep_config_sg().
  * @chan_id: channel ID for sysfs
  * @dev: class device for sysfs
  * @name: backlink name for sysfs
@@ -341,6 +343,12 @@ struct dma_chan {
 	dma_cookie_t cookie;
 	dma_cookie_t completed_cookie;
 
+	/*
+	 * protect between config and prepare transfer because *_prep() may be
+	 * called from complete callback, which is in GFP_NOSLEEP context.
+	 */
+	spinlock_t lock;
+
 	/* sysfs */
 	int chan_id;
 	struct dma_chan_dev *dev;
@@ -1068,6 +1076,84 @@ dmaengine_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	return dmaengine_prep_config_sg(chan, sgl, sg_len, dir, flags, NULL);
 }
 
+/**
+ * dmaengine_prep_config_sg_safe - prepare a scatter-gather DMA transfer
+ *                                 with atomic slave configuration update
+ * @chan: DMA channel
+ * @sgl: scatterlist for the transfer
+ * @sg_len: number of entries in @sgl
+ * @dir: DMA transfer direction
+ * @flags: transfer preparation flags
+ * @config: DMA slave configuration for this transfer
+ *
+ * Prepare a DMA scatter-gather transfer together with a corresponding slave
+ * configuration update in a re-entrant and race-safe manner.
+ *
+ * DMA engine drivers may implement the optional
+ * device_prep_config_sg() callback to perform both the slave configuration
+ * and descriptor preparation atomically. In this case, the operation is
+ * fully handled by the DMA engine driver.
+ *
+ * If the DMA engine driver does not implement device_prep_config_sg(), falls
+ * back to calling dmaengine_slave_config() followed by dmaengine_prep_slave_sg().
+ * The fallback path is protected by a per-channel spinlock to ensure that
+ * concurrent callers cannot interleave configuration and descriptor preparation
+ * on the same DMA channel.
+ *
+ * Return: Pointer to a prepared DMA async transaction descriptor on success,
+ * or %NULL if the transfer could not be prepared.
+ */
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_config_sg_safe(struct dma_chan *chan, struct scatterlist *sgl,
+			      unsigned int sg_len,
+			      enum dma_transfer_direction dir,
+			      unsigned long flags,
+			      struct dma_slave_config *config)
+{
+	struct dma_async_tx_descriptor *tx;
+	unsigned long spinlock_flags;
+
+	if (!chan || !chan->device)
+		return NULL;
+
+	if (!chan->device->device_prep_config_sg)
+		spin_lock_irqsave(&chan->lock, spinlock_flags);
+
+	tx = dmaengine_prep_config_sg(chan, sgl, sg_len, dir, flags, config);
+
+	if (!chan->device->device_prep_config_sg)
+		spin_unlock_irqrestore(&chan->lock, spinlock_flags);
+
+	return tx;
+}
+
+/**
+ * dmaengine_prep_config_single_safe - prepare a single-buffer DMA transfer
+ *                                     with atomic slave configuration update
+ * @chan: DMA channel
+ * @buf: DMA buffer address
+ * @len: length of the transfer in bytes
+ * @dir: DMA transfer direction
+ * @flags: transfer preparation flags
+ * @config: DMA slave configuration for this transfer
+ *
+ * Detail see dmaengine_prep_config_sg_safe().
+ */
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_config_single_safe(struct dma_chan *chan, dma_addr_t buf,
+				  size_t len, enum dma_transfer_direction dir,
+				  unsigned long flags,
+				  struct dma_slave_config *config)
+{
+	struct scatterlist sg;
+
+	sg_init_table(&sg, 1);
+	sg_dma_address(&sg) = buf;
+	sg_dma_len(&sg) = len;
+
+	return dmaengine_prep_config_sg_safe(chan, &sg, 1, dir, flags, config);
+}
+
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 struct rio_dma_ext;
 static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg(

-- 
2.43.0



^ permalink raw reply related

* [PATCH v7 1/9] dmaengine: Add API to combine configuration and preparation (sg and single)
From: Frank.Li @ 2026-05-21 15:32 UTC (permalink / raw)
  To: Vinod Koul, Manivannan Sadhasivam, Krzysztof Wilczyński,
	Kishon Vijay Abraham I, Bjorn Helgaas, Christoph Hellwig,
	Sagi Grimberg, Chaitanya Kulkarni, Herbert Xu, David S. Miller,
	Nicolas Ferre, Alexandre Belloni, Claudiu Beznea, Koichiro Den,
	Niklas Cassel
  Cc: dmaengine, linux-kernel, linux-pci, linux-nvme, mhi,
	linux-arm-msm, linux-crypto, linux-arm-kernel, imx, Frank Li
In-Reply-To: <20260521-dma_prep_config-v7-0-1f73f4899883@nxp.com>

From: Frank Li <Frank.Li@nxp.com>

Previously, configuration and preparation required two separate calls. This
works well when configuration is done only once during initialization.

However, in cases where the burst length or source/destination address must
be adjusted for each transfer, calling two functions is verbose and
requires additional locking to ensure both steps complete atomically.

Add a new API dmaengine_prep_config_single() and dmaengine_prep_config_sg()
and callback device_prep_config_sg() that combines configuration and
preparation into a single operation. If the configuration argument is
passed as NULL, fall back to the existing implementation.

Tested-by: Niklas Cassel <cassel@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
change in v4
- drop context in device_prep_config_sg()

change in v3
- remove Deprecated for callback device_prep_slave_sg().
- Move condition check before sg init.
- split function at return type.
- move safe version to next patch

change in v2
- add () for function
- use short name device_prep_sg(), remove "slave" and "config". the 'slave'
is reduntant. after remove slave, the function name is difference existed
one, so remove _config suffix.
---
 Documentation/driver-api/dmaengine/client.rst |  9 ++++
 include/linux/dmaengine.h                     | 63 +++++++++++++++++++++++----
 2 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst
index d491e385d61a9..5ee5d4a3596dd 100644
--- a/Documentation/driver-api/dmaengine/client.rst
+++ b/Documentation/driver-api/dmaengine/client.rst
@@ -80,6 +80,10 @@ The details of these operations are:
 
   - slave_sg: DMA a list of scatter gather buffers from/to a peripheral
 
+  - config_sg: Similar with slave_sg, just pass down dma_slave_config
+    struct to avoid calling dmaengine_slave_config() every time adjusting the
+    burst length or the FIFO address is needed.
+
   - peripheral_dma_vec: DMA an array of scatter gather buffers from/to a
     peripheral. Similar to slave_sg, but uses an array of dma_vec
     structures instead of a scatterlist.
@@ -106,6 +110,11 @@ The details of these operations are:
 		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
 
+     struct dma_async_tx_descriptor *dmaengine_prep_config_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction dir,
+		unsigned long flags, struct dma_slave_config *config);
+
      struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec(
 		struct dma_chan *chan, const struct dma_vec *vecs,
 		size_t nents, enum dma_data_direction direction,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index b3d251c9734e9..defa377d2ef54 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -835,6 +835,7 @@ struct dma_filter {
  *	where the address and size of each segment is located in one entry of
  *	the dma_vec array.
  * @device_prep_slave_sg: prepares a slave dma operation
+ * @device_prep_config_sg: prepares a slave DMA operation with dma_slave_config
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
@@ -934,6 +935,10 @@ struct dma_device {
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags, void *context);
+	struct dma_async_tx_descriptor *(*device_prep_config_sg)(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, struct dma_slave_config *config);
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
@@ -974,22 +979,44 @@ static inline bool is_slave_direction(enum dma_transfer_direction direction)
 	       (direction == DMA_DEV_TO_DEV);
 }
 
-static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
-	struct dma_chan *chan, dma_addr_t buf, size_t len,
-	enum dma_transfer_direction dir, unsigned long flags)
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_config_single(struct dma_chan *chan, dma_addr_t buf, size_t len,
+			     enum dma_transfer_direction dir,
+			     unsigned long flags,
+			     struct dma_slave_config *config)
 {
 	struct scatterlist sg;
+
+	if (!chan || !chan->device)
+		return NULL;
+
 	sg_init_table(&sg, 1);
 	sg_dma_address(&sg) = buf;
 	sg_dma_len(&sg) = len;
 
-	if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
+	if (chan->device->device_prep_config_sg)
+		return chan->device->device_prep_config_sg(chan, &sg, 1, dir,
+							   flags, config);
+
+	if (config)
+		if (dmaengine_slave_config(chan, config))
+			return NULL;
+
+	if (!chan->device->device_prep_slave_sg)
 		return NULL;
 
 	return chan->device->device_prep_slave_sg(chan, &sg, 1,
 						  dir, flags, NULL);
 }
 
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_slave_single(struct dma_chan *chan, dma_addr_t buf, size_t len,
+			    enum dma_transfer_direction dir,
+			    unsigned long flags)
+{
+	return dmaengine_prep_config_single(chan, buf, len, dir, flags, NULL);
+}
+
 /**
  * dmaengine_prep_peripheral_dma_vec() - Prepare a DMA scatter-gather descriptor
  * @chan: The channel to be used for this descriptor
@@ -1010,17 +1037,37 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec(
 							    dir, flags);
 }
 
-static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
-	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
-	enum dma_transfer_direction dir, unsigned long flags)
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_config_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
+			 unsigned long flags, struct dma_slave_config *config)
 {
-	if (!chan || !chan->device || !chan->device->device_prep_slave_sg)
+	if (!chan || !chan->device)
+		return NULL;
+
+	if (chan->device->device_prep_config_sg)
+		return chan->device->device_prep_config_sg(chan, sgl, sg_len,
+				dir, flags, config);
+
+	if (config)
+		if (dmaengine_slave_config(chan, config))
+			return NULL;
+
+	if (!chan->device->device_prep_slave_sg)
 		return NULL;
 
 	return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
 						  dir, flags, NULL);
 }
 
+static inline struct dma_async_tx_descriptor *
+dmaengine_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags)
+{
+	return dmaengine_prep_config_sg(chan, sgl, sg_len, dir, flags, NULL);
+}
+
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 struct rio_dma_ext;
 static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg(

-- 
2.43.0



^ permalink raw reply related

* [PATCH v7 0/9] dmaengine: Add new API to combine configuration and descriptor preparation
From: Frank.Li @ 2026-05-21 15:32 UTC (permalink / raw)
  To: Vinod Koul, Manivannan Sadhasivam, Krzysztof Wilczyński,
	Kishon Vijay Abraham I, Bjorn Helgaas, Christoph Hellwig,
	Sagi Grimberg, Chaitanya Kulkarni, Herbert Xu, David S. Miller,
	Nicolas Ferre, Alexandre Belloni, Claudiu Beznea, Koichiro Den,
	Niklas Cassel
  Cc: dmaengine, linux-kernel, linux-pci, linux-nvme, mhi,
	linux-arm-msm, linux-crypto, linux-arm-kernel, imx, Frank Li,
	Damien Le Moal

Previously, configuration and preparation required two separate calls. This
works well when configuration is done only once during initialization.

However, in cases where the burst length or source/destination address must
be adjusted for each transfer, calling two functions is verbose.

	if (dmaengine_slave_config(chan, &sconf)) {
		dev_err(dev, "DMA slave config fail\n");
		return -EIO;
	}

	tx = dmaengine_prep_slave_single(chan, dma_local, len, dir, flags);

After new API added

	tx = dmaengine_prep_config_single(chan, dma_local, len, dir, flags, &sconf);

Additional, prevous two calls requires additional locking to ensure both
steps complete atomically.

    mutex_lock()
    dmaengine_slave_config()
    dmaengine_prep_slave_single()
    mutex_unlock()

after new API added, mutex lock can be moved. See patch
     nvmet: pci-epf: Use dmaengine_prep_config_single_safe() API

Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
Changes in v7:
- Remvoe dma_(rx|tx)_lock() in nvmet totally. (sashia AI)
- Link to v6: https://patch.msgid.link/20260520-dma_prep_config-v6-0-06e49b7acb38@nxp.com

Changes in v6:
- Fix sashaki AI report problem, detail see each patch's change log
- Link to v5: https://lore.kernel.org/r/20260512-dma_prep_config-v5-0-26865bf7d935@nxp.com

Changes in v5:
- collect Mani's reviewed-by tags
- use kernel doc for new APIs.
- Link to v4: https://lore.kernel.org/r/20260506-dma_prep_config-v4-0-85b3d22babff@nxp.com

Changes in v4:
- remove void* context in config_prep() callback
- use spin lock to protect config() and prep().
- Link to v3: https://lore.kernel.org/r/20260105-dma_prep_config-v3-0-a8480362fd42@nxp.com

Changes in v3:
- collect review tags
- create safe version in framework
- Link to v2: https://lore.kernel.org/r/20251218-dma_prep_config-v2-0-c07079836128@nxp.com

Changes in v2:
- Use name dmaengine_prep_config_single() and dmaengine_prep_config_sg()
- Add _safe version to avoid confuse, which needn't additional mutex.
- Update document/
- Update commit message. add () for function name. Use upcase for subject.
- Add more explain for remove lock.
- Link to v1: https://lore.kernel.org/r/20251208-dma_prep_config-v1-0-53490c5e1e2a@nxp.com

---
Frank Li (9):
      dmaengine: Add API to combine configuration and preparation (sg and single)
      dmaengine: Add safe API to combine configuration and preparation
      PCI: endpoint: pci-epf-test: Use dmaenigne_prep_config_single() to simplify code
      dmaengine: dw-edma: Use new .device_prep_config_sg() callback
      dmaengine: dw-edma: Pass dma_slave_config to dw_edma_device_transfer()
      nvmet: pci-epf: Remove unnecessary dmaengine_terminate_sync() on each DMA transfer
      nvmet: pci-epf: Use dmaengine_prep_config_single_safe() API
      PCI: epf-mhi: Use dmaengine_prep_config_single() to simplify code
      crypto: atmel: Use dmaengine_prep_config_sg() API

 Documentation/driver-api/dmaengine/client.rst |   9 ++
 drivers/crypto/atmel-aes.c                    |  10 +-
 drivers/dma/dmaengine.c                       |   2 +
 drivers/dma/dw-edma/dw-edma-core.c            |  41 +++++--
 drivers/nvme/target/pci-epf.c                 |  33 +-----
 drivers/pci/endpoint/functions/pci-epf-mhi.c  |  52 +++------
 drivers/pci/endpoint/functions/pci-epf-test.c |   8 +-
 include/linux/dmaengine.h                     | 149 ++++++++++++++++++++++++--
 8 files changed, 208 insertions(+), 96 deletions(-)
---
base-commit: 254f49634ee16a731174d2ae34bc50bd5f45e731
change-id: 20251204-dma_prep_config-654170d245a2

Best regards,
--  
Frank Li <Frank.Li@nxp.com>



^ permalink raw reply

* Re: [PATCH] arm64: defconfig: Drop non-existing (yet) PCI_SKY1_HOST
From: Krzysztof Kozlowski @ 2026-05-21 15:31 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, linux-arm-kernel, soc, linux-kernel
  Cc: Peter Chen
In-Reply-To: <b83ef885-3d29-4e46-917f-26bb8490fe35@oss.qualcomm.com>

On 21/05/2026 17:25, Krzysztof Kozlowski wrote:
> On 21/05/2026 17:21, Krzysztof Kozlowski wrote:
>> The source patch for commit f54f7979ff88 ("arm64: defconfig: Move
>> entries to match savedefconfig") included reorganizing around
>> PCI_SKY1_HOST, which is only in next at that time and was not merged to
>> soc tree.  Applying that patch caused some conflicts which were not
>> really resolved correctly and PCI_SKY1_HOST was added.
>>
>> Cc: Peter Chen <peter.chen@cixtech.com>
>> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
>>
>> ---
>>
>> This should go to soc/defconfig branch.
>>
>> The PCI_SKY1_HOST will come later with:
>> https://lore.kernel.org/all/20260327114628.3800886-1-peter.chen@cixtech.com/
>> ---
>>  arch/arm64/configs/defconfig | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
>> index 094bb9cd8764..4ed684efbbba 100644
>> --- a/arch/arm64/configs/defconfig
>> +++ b/arch/arm64/configs/defconfig
>> @@ -232,7 +232,6 @@ CONFIG_PCIE_XILINX=y
>>  CONFIG_PCIE_XILINX_DMA_PL=y
>>  CONFIG_PCIE_XILINX_NWL=y
>>  CONFIG_PCIE_XILINX_CPM=y
>> -CONFIG_PCI_SKY1_HOST=m
>>  CONFIG_PCI_J721E_HOST=m
>>  CONFIG_PCI_IMX6_HOST=y
> 
> Heh, I see the Cix patch was already merged, so this is wrong :/

I sent correct fix in:
https://lore.kernel.org/r/20260521153003.429610-2-krzysztof.kozlowski@oss.qualcomm.com/

Best regards,
Krzysztof


^ permalink raw reply

* [PATCH] arm64: defconfig: Fixup duplicated PCI_SKY1_HOST
From: Krzysztof Kozlowski @ 2026-05-21 15:30 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, linux-arm-kernel, soc, linux-kernel
  Cc: Krzysztof Kozlowski, Peter Chen

Commit 246e37739f24 ("arm64: defconfig: Enable CIX Sky1 pinctrl, PCIe
host, and Cadence GPIO") placed PCI_SKY1_HOST in wrong spot, thus it got
duplicated when merging with  commit f54f7979ff88 ("arm64: defconfig:
Move entries to match savedefconfig").

Cc: Peter Chen <peter.chen@cixtech.com>
Fixes: 1440d446ad5d ("Merge tag 'cix-defconfig-v7.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/cix into soc/defconfig")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

---

https://krzk.eu/#/builders/102/builds/72/steps/10/logs/warnings__1_
---
 arch/arm64/configs/defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 340313d48e0f..03a057f90527 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -234,7 +234,6 @@ CONFIG_PCIE_XILINX_NWL=y
 CONFIG_PCIE_XILINX_CPM=y
 CONFIG_PCI_SKY1_HOST=m
 CONFIG_PCI_J721E_HOST=m
-CONFIG_PCI_SKY1_HOST=m
 CONFIG_PCI_IMX6_HOST=y
 CONFIG_PCI_LAYERSCAPE=y
 CONFIG_PCI_HISI=y
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH] Bluetooth: btmtk: remove extra copy in cmd array init
From: patchwork-bot+bluetooth @ 2026-05-21 15:30 UTC (permalink / raw)
  To: Jiajia Liu
  Cc: marcel, luiz.dentz, matthias.bgg, angelogioacchino.delregno,
	linux-bluetooth, linux-kernel, linux-arm-kernel, linux-mediatek
In-Reply-To: <20260520021500.13504-1-liujiajia@kylinos.cn>

Hello:

This patch was applied to bluetooth/bluetooth-next.git (master)
by Luiz Augusto von Dentz <luiz.von.dentz@intel.com>:

On Wed, 20 May 2026 10:15:00 +0800 you wrote:
> In btmtk_setup_firmware_79xx, the data length indicated by wmt_params.dlen
> in the cmd buffer is MTK_SEC_MAP_NEED_SEND_SIZE + 1. Except for the first
> byte, the remaining length is MTK_SEC_MAP_NEED_SEND_SIZE. memcpy copied one
> more byte to cmd + 1 than the remaining length. Align the length passed to
> memcpy to avoid exceeding current section map.
> 
> Signed-off-by: Jiajia Liu <liujiajia@kylinos.cn>
> 
> [...]

Here is the summary with links:
  - Bluetooth: btmtk: remove extra copy in cmd array init
    https://git.kernel.org/bluetooth/bluetooth-next/c/b3e1ce138148

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply

* Re: [PATCH net] net: airoha: Disable GDM2 forwarding before configuring GDM2 loopback
From: patchwork-bot+netdevbpf @ 2026-05-21 15:30 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, linux-arm-kernel,
	linux-mediatek, netdev, madhur.agrawal
In-Reply-To: <20260520-airoha-disable-gdm2-fwd-v1-1-1eeea5dffc2f@kernel.org>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 20 May 2026 15:12:02 +0200 you wrote:
> Hw design requires to disable GDM2 forwarding before configuring GDM2
> loopback in airoha_set_gdm2_loopback routine.
> 
> Fixes: 9cd451d414f6e ("net: airoha: Add loopback support for GDM2")
> Tested-by: Madhur Agrawal <madhur.agrawal@airoha.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> 
> [...]

Here is the summary with links:
  - [net] net: airoha: Disable GDM2 forwarding before configuring GDM2 loopback
    https://git.kernel.org/netdev/net/c/985d4a55e64e

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply

* Re: [PATCH] arm64: defconfig: Drop non-existing (yet) PCI_SKY1_HOST
From: Krzysztof Kozlowski @ 2026-05-21 15:25 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, linux-arm-kernel, soc, linux-kernel
  Cc: Peter Chen
In-Reply-To: <20260521152158.425895-2-krzysztof.kozlowski@oss.qualcomm.com>

On 21/05/2026 17:21, Krzysztof Kozlowski wrote:
> The source patch for commit f54f7979ff88 ("arm64: defconfig: Move
> entries to match savedefconfig") included reorganizing around
> PCI_SKY1_HOST, which is only in next at that time and was not merged to
> soc tree.  Applying that patch caused some conflicts which were not
> really resolved correctly and PCI_SKY1_HOST was added.
> 
> Cc: Peter Chen <peter.chen@cixtech.com>
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
> 
> ---
> 
> This should go to soc/defconfig branch.
> 
> The PCI_SKY1_HOST will come later with:
> https://lore.kernel.org/all/20260327114628.3800886-1-peter.chen@cixtech.com/
> ---
>  arch/arm64/configs/defconfig | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index 094bb9cd8764..4ed684efbbba 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -232,7 +232,6 @@ CONFIG_PCIE_XILINX=y
>  CONFIG_PCIE_XILINX_DMA_PL=y
>  CONFIG_PCIE_XILINX_NWL=y
>  CONFIG_PCIE_XILINX_CPM=y
> -CONFIG_PCI_SKY1_HOST=m
>  CONFIG_PCI_J721E_HOST=m
>  CONFIG_PCI_IMX6_HOST=y

Heh, I see the Cix patch was already merged, so this is wrong :/

Best regards,
Krzysztof


^ permalink raw reply

* Re: [PATCH v3 1/1] arm64: defconfig: Enable CIX Sky1 pinctrl, PCIe host, and Cadence GPIO
From: Krzysztof Kozlowski @ 2026-05-21 15:23 UTC (permalink / raw)
  To: Peter Chen, arnd
  Cc: krzysztof.kozlowski, geert+renesas, linux-kernel,
	linux-arm-kernel, cix-kernel-upstream, Yunseong Kim
In-Reply-To: <20260327114628.3800886-1-peter.chen@cixtech.com>

On 27/03/2026 12:46, Peter Chen wrote:
> Enable the CIX Sky1 pinctrl driver (PINCTRL_SKY1), CIX Sky1 PCIe host
> controller (PCI_SKY1_HOST), and Cadence GPIO controller (GPIO_CADENCE)
> for the Radxa Orion O6 board which uses the CIX Sky1 SoC.
> 
> The pinctrl driver is a dependency for other on-SoC peripherals. The
> Cadence-based PCIe host controller enables use of PCIe peripherals on
> the board. The Cadence GPIO controller provides GPIO support for the
> SoC.
> 
> Cc: Yunseong Kim <ysk@kzalloc.com>
> Signed-off-by: Peter Chen <peter.chen@cixtech.com>
> ---
> Changes for v3:
> - Use specific driver names (CIX Sky1 pinctrl, CIX Sky1 PCIe host
>   controller, Cadence GPIO) in subject and commit message instead of
>   generic terms.
> - Remove external Debian bug reference; explain rationale directly.
> - Remove NVMe mention since only PCIe host controller is enabled.
> 
> Changes for v2:
> - Delete CIX HDA configurations due to it is not used at current
>   Orion O6 board device tree.
> 
>  arch/arm64/configs/defconfig | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index b67d5b1fc45b..f9be52484008 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -241,6 +241,7 @@ CONFIG_PCIE_XILINX_DMA_PL=y
>  CONFIG_PCIE_XILINX_NWL=y
>  CONFIG_PCIE_XILINX_CPM=y
>  CONFIG_PCI_J721E_HOST=m
> +CONFIG_PCI_SKY1_HOST=m

This is not correctly placed and caused issues later - conflicts with my
cleanup patch.

Please fix it up before you send the patch to soc@.

Best regards,
Krzysztof


^ permalink raw reply

* [PATCH] arm64: defconfig: Drop non-existing (yet) PCI_SKY1_HOST
From: Krzysztof Kozlowski @ 2026-05-21 15:21 UTC (permalink / raw)
  To: Arnd Bergmann, Krzysztof Kozlowski, Alexandre Belloni,
	Linus Walleij, Drew Fustini, linux-arm-kernel, soc, linux-kernel
  Cc: Krzysztof Kozlowski, Peter Chen

The source patch for commit f54f7979ff88 ("arm64: defconfig: Move
entries to match savedefconfig") included reorganizing around
PCI_SKY1_HOST, which is only in next at that time and was not merged to
soc tree.  Applying that patch caused some conflicts which were not
really resolved correctly and PCI_SKY1_HOST was added.

Cc: Peter Chen <peter.chen@cixtech.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

---

This should go to soc/defconfig branch.

The PCI_SKY1_HOST will come later with:
https://lore.kernel.org/all/20260327114628.3800886-1-peter.chen@cixtech.com/
---
 arch/arm64/configs/defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 094bb9cd8764..4ed684efbbba 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -232,7 +232,6 @@ CONFIG_PCIE_XILINX=y
 CONFIG_PCIE_XILINX_DMA_PL=y
 CONFIG_PCIE_XILINX_NWL=y
 CONFIG_PCIE_XILINX_CPM=y
-CONFIG_PCI_SKY1_HOST=m
 CONFIG_PCI_J721E_HOST=m
 CONFIG_PCI_IMX6_HOST=y
 CONFIG_PCI_LAYERSCAPE=y
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH v10 19/30] KVM: arm64: Provide assembly for SME register access
From: Mark Brown @ 2026-05-21 15:17 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Oliver Upton, Marc Zyngier, Joey Gouly, Catalin Marinas,
	Suzuki K Poulose, Will Deacon, Paolo Bonzini, Jonathan Corbet,
	Shuah Khan, Dave Martin, Fuad Tabba, Ben Horgan, linux-arm-kernel,
	kvmarm, linux-kernel, kvm, linux-doc, linux-kselftest,
	Peter Maydell, Eric Auger
In-Reply-To: <ag8b7oq4SFpdmlP_@J2N7QTR9R3>

[-- Attachment #1: Type: text/plain, Size: 799 bytes --]

On Thu, May 21, 2026 at 03:51:26PM +0100, Mark Rutland wrote:

> While this specific instance is simple enough, I don't think we should
> continue to duplicate the low level save/restore routines between the
> main kernel and KVM hyp code.

> I've sent a series that avoids the need for this, and cleans up some
> other bits):

>   https://lore.kernel.org/linux-arm-kernel/20260521132556.584676-1-mark.rutland@arm.com/

> Assuming Marc and Oliver are on board, I'd prefer that we do that
> cleanup first, and build the KVM SME support atop.

Yeah, I've got a laundry list of things that I want to improve with both
the main kernel and KVM but the latency on getting anything reviewed
with both sides and sometimes obscure implementation decisions means
I've been waiting until this is landed first.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH v7 19/28] media: rockchip: rga: change offset to dma_addresses
From: Michael Tretter @ 2026-05-21 15:16 UTC (permalink / raw)
  To: Sven Püschel
  Cc: Jacob Chen, Ezequiel Garcia, Mauro Carvalho Chehab,
	Heiko Stuebner, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Hans Verkuil, linux-media, linux-rockchip, linux-arm-kernel,
	linux-kernel, devicetree, kernel, nicolas, sebastian.reichel,
	p.zabel, Nicolas Dufresne
In-Reply-To: <20260521-spu-rga3-v7-19-3f33e8c7145f@pengutronix.de>

On Thu, 21 May 2026 00:44:24 +0200, Sven Püschel wrote:
> Change the offset to dma_addresses, as the current naming is misleading.
> The offset naming comes from the fact that it references the offset in
> the mapped iommu address space. But from the hardware point of view this
> is an address, as also pointed out by the register naming
> (e.g. RGA_DST_Y_RGB_BASE_ADDR). Therefore also change the type to
> dma_addr_t, as with an external iommu driver this would also be the
> correct type.
> 
> This change is a preparation for the RGA3 support, which uses an external
> iommu and therefore just gets an dma_addr_t for each buffer. The field
> renaming allows to reuse the existing fields of rga_vb_buffer to store
> these values.
> 
> Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
> Signed-off-by: Sven Püschel <s.pueschel@pengutronix.de>
> 
> ---
> Changes in v6:
> - Also changed dma_addrs variable type to dma_addr_t. Flagged by Sashiko
>   https://sashiko.dev/#/patchset/20260428-spu-rga3-v5-0-eb7f5d019d86%40pengutronix.de?part=20
> ---
>  drivers/media/platform/rockchip/rga/rga-buf.c |  12 +--
>  drivers/media/platform/rockchip/rga/rga-hw.c  | 105 +++++++++++++-------------
>  drivers/media/platform/rockchip/rga/rga.h     |  12 +--
>  3 files changed, 64 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/media/platform/rockchip/rga/rga-buf.c b/drivers/media/platform/rockchip/rga/rga-buf.c
> index dcaba66f5c1fc..ab9554c1c4cd9 100644
> --- a/drivers/media/platform/rockchip/rga/rga-buf.c
> +++ b/drivers/media/platform/rockchip/rga/rga-buf.c
> @@ -121,7 +121,7 @@ static int rga_buf_prepare(struct vb2_buffer *vb)
>  	size_t curr_desc = 0;
>  	int i;
>  	const struct v4l2_format_info *info;
> -	unsigned int offsets[VIDEO_MAX_PLANES];
> +	dma_addr_t dma_addrs[VIDEO_MAX_PLANES];
>  
>  	if (IS_ERR(f))
>  		return PTR_ERR(f);
> @@ -145,18 +145,18 @@ static int rga_buf_prepare(struct vb2_buffer *vb)
>  				 "Failed to map video buffer to RGA\n");
>  			return n_desc;
>  		}
> -		offsets[i] = curr_desc << PAGE_SHIFT;
> +		dma_addrs[i] = curr_desc << PAGE_SHIFT;
>  		curr_desc += n_desc;
>  	}
>  
>  	/* Fill the remaining planes */
>  	info = v4l2_format_info(f->fmt->fourcc);
>  	for (i = info->mem_planes; i < info->comp_planes; i++)
> -		offsets[i] = get_plane_offset(f, info, i);
> +		dma_addrs[i] = dma_addrs[0] + get_plane_offset(f, info, i);

dma_addr[0] looks suspicious, but correct, because mem_planes is either
1 or equal to comp_planes and therefore, the "remaining planes" are
always all planes except for the first plane, which happens to be
dma_addrs[0]. Maybe, dma_addrs[info->mem_planes - 1] would be more
correct, but also even more confusing.

Reviewed-by: Michael Tretter <m.tretter@pengutronix.de>

>  
> -	rbuf->offset.y_off = offsets[0];
> -	rbuf->offset.u_off = offsets[1];
> -	rbuf->offset.v_off = offsets[2];
> +	rbuf->dma_addrs.y_addr = dma_addrs[0];
> +	rbuf->dma_addrs.u_addr = dma_addrs[1];
> +	rbuf->dma_addrs.v_addr = dma_addrs[2];
>  
>  	return 0;
>  }
> diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c
> index 43fd023b7571c..99cf57d5ba89d 100644
> --- a/drivers/media/platform/rockchip/rga/rga-hw.c
> +++ b/drivers/media/platform/rockchip/rga/rga-hw.c
> @@ -16,11 +16,11 @@ enum e_rga_start_pos {
>  	RB = 3,
>  };
>  
> -struct rga_corners_addr_offset {
> -	struct rga_addr_offset left_top;
> -	struct rga_addr_offset right_top;
> -	struct rga_addr_offset left_bottom;
> -	struct rga_addr_offset right_bottom;
> +struct rga_corners_addrs {
> +	struct rga_addrs left_top;
> +	struct rga_addrs right_top;
> +	struct rga_addrs left_bottom;
> +	struct rga_addrs right_bottom;
>  };
>  
>  static unsigned int rga_get_scaling(unsigned int src, unsigned int dst)
> @@ -36,20 +36,20 @@ static unsigned int rga_get_scaling(unsigned int src, unsigned int dst)
>  	return (src > dst) ? ((dst << 16) / src) : ((src << 16) / dst);
>  }
>  
> -static struct rga_corners_addr_offset
> -rga_get_addr_offset(struct rga_frame *frm, struct rga_addr_offset *offset,
> -		    unsigned int x, unsigned int y, unsigned int w, unsigned int h)
> +static struct rga_corners_addrs
> +rga_get_corner_addrs(struct rga_frame *frm, struct rga_addrs *addrs,
> +		     unsigned int x, unsigned int y, unsigned int w, unsigned int h)
>  {
> -	struct rga_corners_addr_offset offsets;
> -	struct rga_addr_offset *lt, *lb, *rt, *rb;
> +	struct rga_corners_addrs corner_addrs;
> +	struct rga_addrs *lt, *lb, *rt, *rb;
>  	const struct v4l2_format_info *format_info;
>  	unsigned int x_div = 0,
>  		     y_div = 0, uv_stride = 0, pixel_width = 0;
>  
> -	lt = &offsets.left_top;
> -	lb = &offsets.left_bottom;
> -	rt = &offsets.right_top;
> -	rb = &offsets.right_bottom;
> +	lt = &corner_addrs.left_top;
> +	lb = &corner_addrs.left_bottom;
> +	rt = &corner_addrs.right_top;
> +	rb = &corner_addrs.right_bottom;
>  
>  	format_info = v4l2_format_info(frm->pix.pixelformat);
>  	/* x_div is only used for the u/v planes.
> @@ -64,29 +64,28 @@ rga_get_addr_offset(struct rga_frame *frm, struct rga_addr_offset *offset,
>  	uv_stride = frm->stride / x_div;
>  	pixel_width = frm->stride / frm->pix.width;
>  
> -	lt->y_off = offset->y_off + y * frm->stride + x * pixel_width;
> -	lt->u_off = offset->u_off + (y / y_div) * uv_stride + x / x_div;
> -	lt->v_off = offset->v_off + (y / y_div) * uv_stride + x / x_div;
> +	lt->y_addr = addrs->y_addr + y * frm->stride + x * pixel_width;
> +	lt->u_addr = addrs->u_addr + (y / y_div) * uv_stride + x / x_div;
> +	lt->v_addr = addrs->v_addr + (y / y_div) * uv_stride + x / x_div;
>  
> -	lb->y_off = lt->y_off + (h - 1) * frm->stride;
> -	lb->u_off = lt->u_off + (h / y_div - 1) * uv_stride;
> -	lb->v_off = lt->v_off + (h / y_div - 1) * uv_stride;
> +	lb->y_addr = lt->y_addr + (h - 1) * frm->stride;
> +	lb->u_addr = lt->u_addr + (h / y_div - 1) * uv_stride;
> +	lb->v_addr = lt->v_addr + (h / y_div - 1) * uv_stride;
>  
> -	rt->y_off = lt->y_off + (w - 1) * pixel_width;
> -	rt->u_off = lt->u_off + w / x_div - 1;
> -	rt->v_off = lt->v_off + w / x_div - 1;
> +	rt->y_addr = lt->y_addr + (w - 1) * pixel_width;
> +	rt->u_addr = lt->u_addr + w / x_div - 1;
> +	rt->v_addr = lt->v_addr + w / x_div - 1;
>  
> -	rb->y_off = lb->y_off + (w - 1) * pixel_width;
> -	rb->u_off = lb->u_off + w / x_div - 1;
> -	rb->v_off = lb->v_off + w / x_div - 1;
> +	rb->y_addr = lb->y_addr + (w - 1) * pixel_width;
> +	rb->u_addr = lb->u_addr + w / x_div - 1;
> +	rb->v_addr = lb->v_addr + w / x_div - 1;
>  
> -	return offsets;
> +	return corner_addrs;
>  }
>  
> -static struct rga_addr_offset *rga_lookup_draw_pos(struct
> -		rga_corners_addr_offset
> -		* offsets, u32 rotate_mode,
> -		u32 mirr_mode)
> +static struct rga_addrs *rga_lookup_draw_pos(struct rga_corners_addrs *corner_addrs,
> +					     u32 rotate_mode,
> +					     u32 mirr_mode)
>  {
>  	static enum e_rga_start_pos rot_mir_point_matrix[4][4] = {
>  		{
> @@ -103,18 +102,18 @@ static struct rga_addr_offset *rga_lookup_draw_pos(struct
>  		},
>  	};
>  
> -	if (!offsets)
> +	if (!corner_addrs)
>  		return NULL;
>  
>  	switch (rot_mir_point_matrix[rotate_mode][mirr_mode]) {
>  	case LT:
> -		return &offsets->left_top;
> +		return &corner_addrs->left_top;
>  	case LB:
> -		return &offsets->left_bottom;
> +		return &corner_addrs->left_bottom;
>  	case RT:
> -		return &offsets->right_top;
> +		return &corner_addrs->right_top;
>  	case RB:
> -		return &offsets->right_bottom;
> +		return &corner_addrs->right_bottom;
>  	}
>  
>  	return NULL;
> @@ -316,9 +315,9 @@ static void rga_cmd_set_trans_info(struct rga_ctx *ctx)
>  }
>  
>  static void rga_cmd_set_src_info(struct rga_ctx *ctx,
> -				 struct rga_addr_offset *offset)
> +				 struct rga_addrs *addrs)
>  {
> -	struct rga_corners_addr_offset src_offsets;
> +	struct rga_corners_addrs src_corner_addrs;
>  	u32 *dest = ctx->cmdbuf_virt;
>  	unsigned int src_h, src_w, src_x, src_y;
>  
> @@ -330,22 +329,22 @@ static void rga_cmd_set_src_info(struct rga_ctx *ctx,
>  	/*
>  	 * Calculate the source framebuffer base address with offset pixel.
>  	 */
> -	src_offsets = rga_get_addr_offset(&ctx->in, offset,
> -					  src_x, src_y, src_w, src_h);
> +	src_corner_addrs = rga_get_corner_addrs(&ctx->in, addrs,
> +						src_x, src_y, src_w, src_h);
>  
>  	dest[(RGA_SRC_Y_RGB_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		src_offsets.left_top.y_off;
> +		src_corner_addrs.left_top.y_addr;
>  	dest[(RGA_SRC_CB_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		src_offsets.left_top.u_off;
> +		src_corner_addrs.left_top.u_addr;
>  	dest[(RGA_SRC_CR_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		src_offsets.left_top.v_off;
> +		src_corner_addrs.left_top.v_addr;
>  }
>  
>  static void rga_cmd_set_dst_info(struct rga_ctx *ctx,
> -				 struct rga_addr_offset *offset)
> +				 struct rga_addrs *addrs)
>  {
> -	struct rga_addr_offset *dst_offset;
> -	struct rga_corners_addr_offset offsets;
> +	struct rga_addrs *dst_addrs;
> +	struct rga_corners_addrs corner_addrs;
>  	u32 *dest = ctx->cmdbuf_virt;
>  	unsigned int dst_h, dst_w, dst_x, dst_y;
>  	unsigned int mir_mode = 0;
> @@ -379,15 +378,15 @@ static void rga_cmd_set_dst_info(struct rga_ctx *ctx,
>  	/*
>  	 * Configure the dest framebuffer base address with pixel offset.
>  	 */
> -	offsets = rga_get_addr_offset(&ctx->out, offset, dst_x, dst_y, dst_w, dst_h);
> -	dst_offset = rga_lookup_draw_pos(&offsets, rot_mode, mir_mode);
> +	corner_addrs = rga_get_corner_addrs(&ctx->out, addrs, dst_x, dst_y, dst_w, dst_h);
> +	dst_addrs = rga_lookup_draw_pos(&corner_addrs, rot_mode, mir_mode);
>  
>  	dest[(RGA_DST_Y_RGB_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		dst_offset->y_off;
> +		dst_addrs->y_addr;
>  	dest[(RGA_DST_CB_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		dst_offset->u_off;
> +		dst_addrs->u_addr;
>  	dest[(RGA_DST_CR_BASE_ADDR - RGA_MODE_BASE_REG) >> 2] =
> -		dst_offset->v_off;
> +		dst_addrs->v_addr;
>  }
>  
>  static void rga_cmd_set_mode(struct rga_ctx *ctx)
> @@ -426,8 +425,8 @@ static void rga_cmd_set(struct rga_ctx *ctx,
>  
>  	rga_cmd_set_dst_addr(ctx, dst->dma_desc_pa);
>  
> -	rga_cmd_set_src_info(ctx, &src->offset);
> -	rga_cmd_set_dst_info(ctx, &dst->offset);
> +	rga_cmd_set_src_info(ctx, &src->dma_addrs);
> +	rga_cmd_set_dst_info(ctx, &dst->dma_addrs);
>  
>  	rga_write(rga, RGA_CMD_BASE, ctx->cmdbuf_phy);
>  
> diff --git a/drivers/media/platform/rockchip/rga/rga.h b/drivers/media/platform/rockchip/rga/rga.h
> index cee2e75ea89f1..bf21a57555a59 100644
> --- a/drivers/media/platform/rockchip/rga/rga.h
> +++ b/drivers/media/platform/rockchip/rga/rga.h
> @@ -97,10 +97,10 @@ struct rockchip_rga {
>  	const struct rga_hw *hw;
>  };
>  
> -struct rga_addr_offset {
> -	unsigned int y_off;
> -	unsigned int u_off;
> -	unsigned int v_off;
> +struct rga_addrs {
> +	dma_addr_t y_addr;
> +	dma_addr_t u_addr;
> +	dma_addr_t v_addr;
>  };
>  
>  struct rga_vb_buffer {
> @@ -112,8 +112,8 @@ struct rga_vb_buffer {
>  	dma_addr_t dma_desc_pa;
>  	size_t n_desc;
>  
> -	/* Plane offsets of this buffer into the mapping */
> -	struct rga_addr_offset offset;
> +	/* Plane DMA addresses after the MMU mapping of the buffer */
> +	struct rga_addrs dma_addrs;
>  };
>  
>  static inline struct rga_vb_buffer *vb_to_rga(struct vb2_v4l2_buffer *vb)
> 
> -- 
> 2.54.0
> 
> 


^ permalink raw reply

* Re: [PATCH] arm64: tlb: Flush walk cache when unsharing PMD tables
From: Catalin Marinas @ 2026-05-21 15:15 UTC (permalink / raw)
  To: Zeng Heng
  Cc: will, akpm, npiggin, aneesh.kumar, peterz, linux-kernel,
	wangkefeng.wang, linux-arm-kernel, linux-mm, linux-arch,
	David Hildenbrand
In-Reply-To: <ag8fHYL-S26uO0yZ@arm.com>

On Thu, May 21, 2026 at 04:05:07PM +0100, Catalin Marinas wrote:
> + David H.
> 
> On Thu, May 21, 2026 at 03:30:11PM +0800, Zeng Heng wrote:
> > From: Zeng Heng <zengheng4@huawei.com>
> > 
> > When huge_pmd_unshare() is called to unshare a PMD table, the
> > tlb_unshare_pmd_ptdesc() function sets tlb->unshared_tables=true
> > but the aarch64 tlb_flush() only checked tlb->freed_tables to
> > determine whether to use TLBF_NONE (vae1is, invalidates walk
> > cache) or TLBF_NOWALKCACHE (vale1is, leaf-only).
> > 
> > This caused the stale PMD page table entry to remain in the walk cache
> > after unshare, potentially leading to incorrect page table walks.
> > 
> > Fix by including unshared_tables in the check, so that when
> > unsharing tables, TLBF_NONE is used and the walk cache is properly
> > invalidated.
> > 
> > Here is the detailed distinction between vae1is and vale1is:
> > 
> > | Instruction Combination  | Actual Invalidation Scope                         |
> > | ------------------------ | --------------------------------------------------|
> > | `VAE1IS`  + TTL=`0`      | All entries at all levels (full invalidation)     |
> > | `VAE1IS`  + TTL=`2` (L2) | Non-leaf at Level 0/1 + leaf at Level 2           |
> > | `VALE1IS` + TTL=`0`      | Leaf entries at all levels (non-leaf not cleared) |
> > | `VALE1IS` + TTL=`2` (L2) | Leaf entry at Level 2 only                        |
> > 
> > Signed-off-by: Zeng Heng <zengheng4@huawei.com>
> 
> The fix looks fine but does it need:
> 
> Fixes: 8ce720d5bd91 ("mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather")
> Cc: <stable@vger.kernel.org>
> 
> > ---
> >  arch/arm64/include/asm/tlb.h | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> > index 10869d7731b8..751bd57bc3ba 100644
> > --- a/arch/arm64/include/asm/tlb.h
> > +++ b/arch/arm64/include/asm/tlb.h
> > @@ -53,7 +53,8 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
> >  static inline void tlb_flush(struct mmu_gather *tlb)
> >  {
> >  	struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
> > -	tlbf_t flags = tlb->freed_tables ? TLBF_NONE : TLBF_NOWALKCACHE;
> > +	tlbf_t flags = (tlb->freed_tables || tlb->unshared_tables) ?
> > +			TLBF_NONE : TLBF_NOWALKCACHE;
> >  	unsigned long stride = tlb_get_unmap_size(tlb);
> >  	int tlb_level = tlb_get_level(tlb);

Do we need this as well?

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 10869d7731b8..3f4ab38cfd6e 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -24,7 +24,7 @@ static void tlb_flush(struct mmu_gather *tlb);
 static inline int tlb_get_level(struct mmu_gather *tlb)
 {
 	/* The TTL field is only valid for the leaf entry. */
-	if (tlb->freed_tables)
+	if (tlb->freed_tables || tlb->unshared_tables)
 		return TLBI_TTL_UNKNOWN;
 
 	if (tlb->cleared_ptes && !(tlb->cleared_pmds ||


^ permalink raw reply related

* Re: [PATCH v14 03/44] arm64: RME: Handle Granule Protection Faults (GPFs)
From: Steven Price @ 2026-05-21 15:15 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: kvm, kvmarm, Catalin Marinas, Will Deacon, James Morse,
	Oliver Upton, Suzuki K Poulose, Zenghui Yu, linux-arm-kernel,
	linux-kernel, Joey Gouly, Alexandru Elisei, Christoffer Dall,
	Fuad Tabba, linux-coco, Ganapatrao Kulkarni, Gavin Shan,
	Shanker Donthineni, Alper Gun, Aneesh Kumar K . V, Emi Kisanuki,
	Vishal Annapurve, WeiLin.Chang, Lorenzo.Pieralisi2
In-Reply-To: <86fr3lvtk3.wl-maz@kernel.org>

On 21/05/2026 13:25, Marc Zyngier wrote:
> On Wed, 13 May 2026 14:17:11 +0100,
> Steven Price <steven.price@arm.com> wrote:
>>
>> If the host attempts to access granules that have been delegated for use
>> in a realm these accesses will be caught and will trigger a Granule
>> Protection Fault (GPF).
>>
>> A fault during a page walk signals a bug in the kernel and is handled by
>> oopsing the kernel. A non-page walk fault could be caused by user space
>> having access to a page which has been delegated to the kernel and will
>> trigger a SIGBUS to allow debugging why user space is trying to access a
>> delegated page.
>>
>> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>> Changes since v10:
>>  * Don't call arm64_notify_die() in do_gpf() but simply return 1.
>> Changes since v2:
>>  * Include missing "Granule Protection Fault at level -1"
>> ---
>>  arch/arm64/mm/fault.c | 28 ++++++++++++++++++++++------
>>  1 file changed, 22 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 0f3c5c7ca054..6358ea4787ba 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -905,6 +905,22 @@ static int do_tag_check_fault(unsigned long far, unsigned long esr,
>>  	return 0;
>>  }
>>  
>> +static int do_gpf_ptw(unsigned long far, unsigned long esr, struct pt_regs *regs)
>> +{
>> +	const struct fault_info *inf = esr_to_fault_info(esr);
>> +
>> +	die_kernel_fault(inf->name, far, esr, regs);
>> +	return 0;
>> +}
>> +
>> +static int do_gpf(unsigned long far, unsigned long esr, struct pt_regs *regs)
>> +{
>> +	if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr))
>> +		return 0;
>> +
>> +	return 1;
>> +}
>> +
>>  static const struct fault_info fault_info[] = {
>>  	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
>>  	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
>> @@ -941,12 +957,12 @@ static const struct fault_info fault_info[] = {
>>  	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 32"			},
>>  	{ do_alignment_fault,	SIGBUS,  BUS_ADRALN,	"alignment fault"		},
>>  	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 34"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 35"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 36"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 37"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 38"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 39"			},
>> -	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 40"			},
>> +	{ do_gpf_ptw,		SIGKILL, SI_KERNEL,	"Granule Protection Fault at level -1" },
>> +	{ do_gpf_ptw,		SIGKILL, SI_KERNEL,	"Granule Protection Fault at level 0" },
>> +	{ do_gpf_ptw,		SIGKILL, SI_KERNEL,	"Granule Protection Fault at level 1" },
>> +	{ do_gpf_ptw,		SIGKILL, SI_KERNEL,	"Granule Protection Fault at level 2" },
>> +	{ do_gpf_ptw,		SIGKILL, SI_KERNEL,	"Granule Protection Fault at level 3" },
>> +	{ do_gpf,		SIGBUS,  SI_KERNEL,	"Granule Protection Fault not on table walk" },
> 
> It wouldn't hurt to align the textual description with what we have
> for other fault syndromes:
> 
> 	"level X granule protection fault (translation table walk)"
> 
> for the PTW-trigger faults, and
> 
> 	"granule protection fault"
> 
> for the non PTW case.

Sure, no problem.

Thanks,
Steve

> 
> Thanks,
> 
> 	M.
> 



^ permalink raw reply

* Re: [PATCH v14 02/44] kvm: arm64: Avoid including linux/kvm_host.h in kvm_pgtable.h
From: Steven Price @ 2026-05-21 15:11 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: kvm, kvmarm, Catalin Marinas, Will Deacon, James Morse,
	Oliver Upton, Suzuki K Poulose, Zenghui Yu, linux-arm-kernel,
	linux-kernel, Joey Gouly, Alexandru Elisei, Christoffer Dall,
	Fuad Tabba, linux-coco, Ganapatrao Kulkarni, Gavin Shan,
	Shanker Donthineni, Alper Gun, Aneesh Kumar K . V, Emi Kisanuki,
	Vishal Annapurve, WeiLin.Chang, Lorenzo.Pieralisi2
In-Reply-To: <86ik8hvz2f.wl-maz@kernel.org>

On 21/05/2026 11:26, Marc Zyngier wrote:
> On Wed, 13 May 2026 14:17:10 +0100,
> Steven Price <steven.price@arm.com> wrote:
>>
>> To avoid future include cycles, drop the linux/kvm_host.h include in
>> kvm_pgtable.h and include two _types.h headers for the types that are
>> actually used. Additionally provide a forward declaration for struct
>> kvm_s2_mmu as it's only used as a pointer in this file.
>>
>> Both pgtable.c and kvm_pkvm.h relied on the indirect inclusion of
>> kvm_host.h, so make that explicit.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>> New patch in v13
>> ---
>>  arch/arm64/include/asm/kvm_pgtable.h | 5 ++++-
>>  arch/arm64/include/asm/kvm_pkvm.h    | 2 +-
>>  arch/arm64/kvm/hyp/pgtable.c         | 1 +
>>  3 files changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
>> index 41a8687938eb..e4770ce2ccf6 100644
>> --- a/arch/arm64/include/asm/kvm_pgtable.h
>> +++ b/arch/arm64/include/asm/kvm_pgtable.h
>> @@ -8,9 +8,12 @@
>>  #define __ARM64_KVM_PGTABLE_H__
>>  
>>  #include <linux/bits.h>
>> -#include <linux/kvm_host.h>
>> +#include <linux/kvm_types.h>
>> +#include <linux/rbtree_types.h>
> 
> I'm surprised by this. Where is the rbtree_type.h requirement coming
> from?

struct kvm_pgtable has a "struct rb_root_cached" for pkvm_mappings.
There's definitely an argument that that's a bit ugly - but this seemed
the cleanest fix from a include perspective.

Thanks,
Steve

> 
> Thanks,
> 
> 	M.
> 



^ permalink raw reply

* Re: [PATCH v14 01/44] kvm: arm64: Include kvm_emulate.h in kvm/arm_psci.h
From: Steven Price @ 2026-05-21 15:11 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: kvm, kvmarm, Suzuki K Poulose, Catalin Marinas, Will Deacon,
	James Morse, Oliver Upton, Zenghui Yu, linux-arm-kernel,
	linux-kernel, Joey Gouly, Alexandru Elisei, Christoffer Dall,
	Fuad Tabba, linux-coco, Ganapatrao Kulkarni, Gavin Shan,
	Shanker Donthineni, Alper Gun, Aneesh Kumar K . V, Emi Kisanuki,
	Vishal Annapurve, WeiLin.Chang, Lorenzo.Pieralisi2
In-Reply-To: <86jysxvze2.wl-maz@kernel.org>

On 21/05/2026 11:19, Marc Zyngier wrote:
> On Wed, 13 May 2026 14:17:09 +0100,
> Steven Price <steven.price@arm.com> wrote:
>>
>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>>
>> Fix a potential build error (like below, when asm/kvm_emulate.h gets
>> included after the kvm/arm_psci.h) by including the missing header file
>> in kvm/arm_psci.h:
>>
>> ./include/kvm/arm_psci.h: In function ‘kvm_psci_version’:
>> ./include/kvm/arm_psci.h:29:13: error: implicit declaration of function
>>    ‘vcpu_has_feature’; did you mean ‘cpu_have_feature’? [-Werror=implicit-function-declaration]
>>    29 |         if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) {
>> 	         |             ^~~~~~~~~~~~~~~~
>> 			       |             cpu_have_feature
>>
>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Signed-off-by: Steven Price <steven.price@arm.com>
> 
> Unrelated to this patch, but really easy to fix: the standard prefix
> for patches targeting KVM/arm64 is:
> 
> "KVM: arm64: [opt subsys:] Something starting with a capital letter"
> 
> where "opt subsys" could be "CCA" where applicable.
> 
> It'd be good to have some consistency.

Sure, I think back when I started this there wasn't great consistency so
I picked up something from git log. I'm happy to change this for the
next posting.

Thanks,
Steve

> 
> Thanks,
> 
> 	M.
> 



^ permalink raw reply

* Re: [PATCH v5 1/5] PCI: host-common: Add helper to determine host bridge D3cold eligibility
From: Manivannan Sadhasivam @ 2026-05-21 15:09 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Krishna Chaitanya Chundru, Jingoo Han, Lorenzo Pieralisi,
	Krzysztof Wilczyński, Rob Herring, Bjorn Helgaas,
	Will Deacon, linux-pci, linux-kernel, linux-arm-msm,
	linux-arm-kernel, jonathanh, bjorn.andersson
In-Reply-To: <20260520202755.GA120626@bhelgaas>

On Wed, May 20, 2026 at 03:27:55PM -0500, Bjorn Helgaas wrote:
> On Tue, May 19, 2026 at 05:39:01PM -0500, Bjorn Helgaas wrote:
> > On Wed, Apr 29, 2026 at 12:12:23PM +0530, Krishna Chaitanya Chundru wrote:
> > > Add a common helper, pci_host_common_d3cold_possible(), to determine
> > > whether PCIe devices under host bridge can safely transition to D3cold.
> > ...
> 
> > > +static int __pci_host_common_d3cold_possible(struct pci_dev *pdev, void *userdata)
> > > +{
> > > +	u32 *flags = userdata;
> > > +	int type;
> > > +
> > > +	/* Ignore conventional PCI devices */
> > > +	if (!pci_is_pcie(pdev))
> > > +		return 0;
> > > +
> > > +	type = pci_pcie_type(pdev);
> > > +	if (type != PCI_EXP_TYPE_ENDPOINT &&
> > > +	    type != PCI_EXP_TYPE_LEG_END &&
> > > +	    type != PCI_EXP_TYPE_RC_END)
> > > +		return 0;
> > 
> > From https://sashiko.dev/#/patchset/20260429-d3cold-v5-0-89e9735b9df6%40oss.qualcomm.com:
> > 
> >   If the topology contains an active conventional PCI device or an
> >   intermediate PCIe switch in PCI_D0, returning 0 here allows
> >   pci_walk_bus() to continue without clearing the
> >   PCI_HOST_D3COLD_ALLOWED flag.
> > 
> >   Does this create a situation where the host bridge might
> >   aggressively power off the link, dropping power to these active
> >   components?
> > 
> > I guess this is intentional, since you have comment about ignoring
> > conventional PCI devices.  But this does seem like a potential
> > problem.  Why should we ignore switches here?  And I think it's still
> > fairly common to have a PCIe-to-PCI bridge leading to a conventional
> > PCI device, and I don't know why we should ignore them.
> > 
> > The commit log consistently refers to "PCIe" devices and endpoints, so
> > maybe there's some reason that I'm missing.
> > 
> > There are other sashiko comments on this series that I think should
> > also be looked at.
> 
> This series is all in pci/next, so you and Mani can decide on whether
> any sashiko comments need to be addressed.
> 
> Even if there's no code change, I think it'd be nice to have a brief
> comment here about why conventional PCI and switches are ignored.

Looking at the helper again, I think we should allow all PCI/PCIe devices to
take part in the D3Cold check including Switch, Bridge, RP, RCiEP and RC-EC.
Some of them like RCiEP and RC-EC cannot be put into D3Cold by the host
controller drivers individually, but if they are bound to a driver, then there
is a possibility that the driver would want those devices to be kept in D0 for
some reason. In that case, the host controller driver should not broadcast
PME_Turn_Off.

So I've removed the PCIe device checks altogether including the check for
conventional PCI devices in the PCI tree.

- Mani

-- 
மணிவண்ணன் சதாசிவம்


^ permalink raw reply

* Re: [PATCH] arm64: tlb: Flush walk cache when unsharing PMD tables
From: Catalin Marinas @ 2026-05-21 15:05 UTC (permalink / raw)
  To: Zeng Heng
  Cc: will, akpm, npiggin, aneesh.kumar, peterz, linux-kernel,
	wangkefeng.wang, linux-arm-kernel, linux-mm, linux-arch,
	David Hildenbrand
In-Reply-To: <20260521073011.4121277-1-zengheng@huaweicloud.com>

+ David H.

On Thu, May 21, 2026 at 03:30:11PM +0800, Zeng Heng wrote:
> From: Zeng Heng <zengheng4@huawei.com>
> 
> When huge_pmd_unshare() is called to unshare a PMD table, the
> tlb_unshare_pmd_ptdesc() function sets tlb->unshared_tables=true
> but the aarch64 tlb_flush() only checked tlb->freed_tables to
> determine whether to use TLBF_NONE (vae1is, invalidates walk
> cache) or TLBF_NOWALKCACHE (vale1is, leaf-only).
> 
> This caused the stale PMD page table entry to remain in the walk cache
> after unshare, potentially leading to incorrect page table walks.
> 
> Fix by including unshared_tables in the check, so that when
> unsharing tables, TLBF_NONE is used and the walk cache is properly
> invalidated.
> 
> Here is the detailed distinction between vae1is and vale1is:
> 
> | Instruction Combination  | Actual Invalidation Scope                         |
> | ------------------------ | --------------------------------------------------|
> | `VAE1IS`  + TTL=`0`      | All entries at all levels (full invalidation)     |
> | `VAE1IS`  + TTL=`2` (L2) | Non-leaf at Level 0/1 + leaf at Level 2           |
> | `VALE1IS` + TTL=`0`      | Leaf entries at all levels (non-leaf not cleared) |
> | `VALE1IS` + TTL=`2` (L2) | Leaf entry at Level 2 only                        |
> 
> Signed-off-by: Zeng Heng <zengheng4@huawei.com>

The fix looks fine but does it need:

Fixes: 8ce720d5bd91 ("mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather")
Cc: <stable@vger.kernel.org>

> ---
>  arch/arm64/include/asm/tlb.h | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> index 10869d7731b8..751bd57bc3ba 100644
> --- a/arch/arm64/include/asm/tlb.h
> +++ b/arch/arm64/include/asm/tlb.h
> @@ -53,7 +53,8 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
>  static inline void tlb_flush(struct mmu_gather *tlb)
>  {
>  	struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
> -	tlbf_t flags = tlb->freed_tables ? TLBF_NONE : TLBF_NOWALKCACHE;
> +	tlbf_t flags = (tlb->freed_tables || tlb->unshared_tables) ?
> +			TLBF_NONE : TLBF_NOWALKCACHE;
>  	unsigned long stride = tlb_get_unmap_size(tlb);
>  	int tlb_level = tlb_get_level(tlb);
>  
> -- 
> 2.43.0

-- 
Catalin


^ permalink raw reply

* [PATCH v2 39/39] Documentation: KVM: Add the VGICv5 IRS save/restore sequences
From: Sascha Bischoff @ 2026-05-21 15:02 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

When saving/restoring the state of the GICv5 IRS, it is important that
it happens in the correct order. Failure to do so will almost
certainly result in failing to restore a guest that is capable of
handling interrupts correctly.

On a save, the ISTs must be saved prior to saving the guest's memory
as the guest's LPI IST is written to guest memory. Conversely, on
restore the guest's memory must be restored prior to restoring the
ISTs.

It is important to restore the IRS MMIO registers by first restoring
the IRS_IDx registers as they define the capabilities of the IRS, and
are used as part of creating and managing ISTs and SPIs.

In order to restore the ISTs themselves, the IRS_IST_CFGR must be
restored prior to the IRS_IST_BASER. KVM uses these restored registers
when KVM_DEV_ARM_VGIC_GRP_IST is restored to determine whether a guest
LPI IST exists, how large it must be, and where the guest-provided
migration storage lives. The host LPI IST is allocated and populated
as part of restoring KVM_DEV_ARM_VGIC_GRP_IST.

At this stage the remaining MMIO registers can be restored. The SPI
IST gets extracted from a userspace provided buffer, and is
transferred to the host-allocated SPI IST. The LPI IST is extracted
from guest memory, and is written to the host-allocated LPI IST.

As a general rule, the IRS_*_STATUSR registers can be ignored on
restore. They are not userspace writable.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 .../virt/kvm/devices/arm-vgic-v5.rst          | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index 0ee0fe9308fc9..188851f22f9eb 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -232,3 +232,48 @@ Groups:
                    or tracking pending interrupts
       -ETIMEDOUT   An IRS save/VM operation timed out
       ===========  ============================================================
+
+IRS Save Sequence:
+------------------
+
+The following operations are required when saving the virtual GICv5 IRS:
+
+a) Save the ISTs by issuing KVM_GET_DEVICE_ATTR on KVM_DEV_ARM_VGIC_GRP_IST.
+b) Save the IRS MMIO register state by issuing KVM_GET_DEVICE_ATTR on
+   KVM_DEV_ARM_VGIC_GRP_IRS_REGS.
+
+These two steps may be performed in either order. However, the guest memory
+must be serialised after the ISTs have been saved, as saving the LPI IST writes
+the IST state back into guest memory.
+
+IRS Restore Sequence:
+---------------------
+
+The following ordering must be followed when restoring the virtual GICv5 and
+IRS:
+
+a) Create vCPUs.
+b) Provide the IRS base address by issuing KVM_SET_DEVICE_ATTR on
+   KVM_DEV_ARM_VGIC_GRP_ADDR
+c) Restore the number of SPIs by issuing KVM_SET_DEVICE_ATTR on
+   KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+d) Initialise the GIC - this sets up the default state and creates the SPI
+   IST - by issuing KVM_SET_DEVICE_ATTR on KVM_DEV_ARM_VGIC_GRP_CTRL with
+   KVM_DEV_ARM_VGIC_CTRL_INIT
+e) Restore guest memory.
+f) Restore the IRS MMIO register state by issuing KVM_SET_DEVICE_ATTR on
+   KVM_DEV_ARM_VGIC_GRP_IRS_REGS. KVM uses the restored IRS_IST_CFGR and
+   IRS_IST_BASER state to allocate the LPI IST during the following step.
+g) Restore the ISTs by issuing KVM_SET_DEVICE_ATTR on
+   KVM_DEV_ARM_VGIC_GRP_IST.
+
+The number of SPIs must be restored before VGIC initialization because
+initialization allocates the SPI state and fixes the SPI range exposed by the
+IRS ID registers.
+
+The various ``*_STATUSR`` registers are observational state in the current KVM
+implementation. Userspace may save them for validation or debugging purposes,
+but they are not required as restore input and do not need to be replayed during
+restore.
+
+Then vCPUs can be started.
-- 
2.34.1

^ permalink raw reply related

* [PATCH v2 38/39] Documentation: KVM: Add docs for KVM_DEV_ARM_VGIC_GRP_IST
From: Sascha Bischoff @ 2026-05-21 15:02 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

Document the IST save/restore userspace interface for the VGICv5
device, KVM_DEV_ARM_VGIC_GRP_IST.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 .../virt/kvm/devices/arm-vgic-v5.rst          | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index 217a1ecfbdc5f..0ee0fe9308fc9 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -177,3 +177,58 @@ Groups:
              64-bit aligned for 64-bit registers
     -EBUSY   VGIC is not initialized, or one or more VCPUs are running
     =======  =================================================================
+
+  KVM_DEV_ARM_VGIC_GRP_IST
+    Attributes:
+      This interface is used to either save the state of the IRS's Interrupt
+      State Tables (ISTs), or to restore them. A get operation saves IST state,
+      and a set operation restores IST state. kvm_device_attr.attr is reserved
+      and must be zero.
+
+      The VGIC must be initialized before using this interface. Restore must be
+      performed before the VM has run. For restore, userspace must have already
+      restored the IRS state and guest memory needed to describe and back any
+      guest LPI IST.
+
+      Saving first asks the IRS to save and quiesce the VM so that interrupt
+      state has been written back to the ISTs. KVM checks that the VM remains
+      quiesced while copying out the SPI and LPI IST state.
+
+      The LPI IST is written to or read from guest-allocated memory. KVM assumes
+      that the guest has provisioned a linear virtual IST through IRS_IST_CFGR
+      and IRS_IST_BASER, and uses that guest memory as the LPI IST migration
+      storage. If the guest has not enabled an LPI IST, there is no LPI IST
+      state to save or restore.
+
+      The SPI IST has no guest-owned backing memory, so userspace must provide a
+      buffer through kvm_device_attr.addr for both get and set operations. The
+      buffer contains one little-endian 32-bit IST entry per exposed SPI, in SPI
+      number order. Its size is:
+
+        nr_spis * sizeof(__u32)
+
+      where nr_spis is the value returned by KVM_DEV_ARM_VGIC_GRP_NR_IRQS for
+      the VGICv5 device. For VGICv5 this value is the number of SPIs, not the
+      total number of interrupts. Since VGICv5 currently exposes at least 32
+      SPIs, kvm_device_attr.addr must be non-zero.
+
+    Errors:
+
+      ===========  ============================================================
+      -EBUSY       One or more VCPUs are running, the VGIC is not initialized,
+                   restore was requested after the VM has run, an LPI IST
+                   already exists, or the save operation completed but the VM
+                   did not remain quiesced
+      -EINVAL      A userspace SPI IST buffer was not supplied when one is
+                   required, or an internal VM table operation rejected the VM
+                   state
+      -ENOENT      A userspace SPI IST buffer was supplied, but there is no SPI
+                   IST to serialise/unserialise
+      -EFAULT      Invalid user pointer for attr->addr, or the guest memory
+                   backing the LPI IST could not be accessed
+      -ENXIO       Required per-VM VGICv5/IST backing state is missing or
+                   inconsistent
+      -ENOMEM      Restoring IST state failed while allocating the host LPI IST
+                   or tracking pending interrupts
+      -ETIMEDOUT   An IRS save/VM operation timed out
+      ===========  ============================================================
-- 
2.34.1


^ permalink raw reply related

* [PATCH v2 37/39] Documentation: KVM: Add KVM_DEV_ARM_VGIC_GRP_IRS_REGS to VGICv5 docs
From: Sascha Bischoff @ 2026-05-21 15:01 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

Document the KVM_DEV_ARM_VGIC_GRP_IRS_REGS attribute group used to
read and write the virtual IRS's MMIO register state. This provides a
GICv5-specific interface for state that is conceptually similar to the
VGICv3 ITS register interface, but uses IRS terminology instead of ITS.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 .../virt/kvm/devices/arm-vgic-v5.rst          | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index e2045b09f27d0..217a1ecfbdc5f 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -141,3 +141,39 @@ Groups:
     ICC_CR0_EL1
     ICC_PCR_EL1
     =======================  ===================================================
+
+  KVM_DEV_ARM_VGIC_GRP_IRS_REGS
+    Attributes:
+      The attr field of kvm_device_attr encodes the offset of the IRS register,
+      relative to the IRS CONFIG_FRAME base address. This is the address that
+      was provided via KVM_VGIC_V5_ADDR_TYPE_IRS when creating VGICv5 in the
+      first place.
+
+      kvm_device_attr.addr points to a __u64 value whatever the width
+      of the addressed register (32/64 bits). 64 bit registers can only
+      be accessed with full length.
+
+      Writes to read-only registers are ignored by the kernel except for:
+
+      - IRS_IDR0 - IRS_IDR2 and IRS_IDR5 - IRS_IDR7: These are sanity checked to
+        ensure that they match a sane config.
+      - IRS_IDR3 and IRS_IDR4: These are RAZ/WI as nested virtualization is not
+        supported.
+
+      For registers without dedicated userspace accessors, getting or setting a
+      register uses the same emulated MMIO handlers as guest reads/writes.
+      Dedicated userspace accessors may instead save or restore migration state
+      without triggering guest-visible side effects. For example, restoring
+      IRS_IST_BASER only restores the emulated register state; any host LPI IST
+      allocation based on the restored IRS_IST_CFGR and IRS_IST_BASER state
+      happens when KVM_DEV_ARM_VGIC_GRP_IST is restored.
+
+  Errors:
+
+    =======  =================================================================
+    -ENXIO   Offset does not correspond to any supported register
+    -EFAULT  Invalid user pointer for attr->addr
+    -EINVAL  Offset is not 32-bit aligned for 32-bit MMIO registers, or not
+             64-bit aligned for 64-bit registers
+    -EBUSY   VGIC is not initialized, or one or more VCPUs are running
+    =======  =================================================================
-- 
2.34.1


^ permalink raw reply related

* [PATCH v2 36/39] Documentation: KVM: Document KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS for VGICv5
From: Sascha Bischoff @ 2026-05-21 15:01 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

The virtual GICv5 adopts the same mechanism as GICv3 for userspace
read and writes of the system registers, albeit operating on a
different set of registers, of course.

Document KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS for GICv5 in the VGICv5
documentation, explicitly calling out the registers it operates
on. The main body of documentation has been directly copied from the
VGICv3 documentation as it has identical operation.

One key thing to note is that for two sets of GICv5 registers - those
pertaining to Active and Pending state - the operation of the
interface is different to how the actual registers operate. Both of
these registers have C and S variants (to set and clear bits) in
hardware. However for this interface, we ONLY implement the S variant,
AND treat it as a raw write. This simplifies the act of reading or
writing the state.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 .../virt/kvm/devices/arm-vgic-v5.rst          | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
index 5c6323d82f784..e2045b09f27d0 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v5.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -75,3 +75,69 @@ Groups:
     -EFAULT  Invalid guest ram access
     -EBUSY   One or more VCPUS are running
     =======  ========================================================
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
+   Attributes:
+
+    The attr field of kvm_device_attr encodes two values::
+
+      bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 |
+      values:   |         mpidr         |      RES     |    instr    |
+
+    The mpidr field encodes the CPU ID based on the affinity information in the
+    architecture defined MPIDR, and the field is encoded as follows::
+
+      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
+      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
+
+    The instr field encodes the system register to access based on the fields
+    defined in the A64 instruction set encoding for system register access
+    (RES means the bits are reserved for future use and should be zero)::
+
+      | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
+      |   Op 0    |    Op1    |    CRn   |   CRm   |   Op2   |
+
+    All system regs accessed through this API are (rw, 64-bit) and
+    kvm_device_attr.addr points to a __u64 value.
+
+    KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS accesses the CPU interface registers for the
+    CPU specified by the mpidr field.
+
+    The available registers are:
+
+    =======================  ===================================================
+    ICC_ICSR_EL1
+    ICC_PPI_ENABLER0_EL1
+    ICC_PPI_ENABLER1_EL1
+    ICC_PPI_SACTIVER0_EL1    ICC_PPI_CACTIVER0_EL1 is not supported. Writes to
+                             ICC_PPI_SACTIVER0_EL1 are treated as RAW writes of
+                             the underlying state.
+    ICC_PPI_SACTIVER1_EL1    ICC_PPI_CACTIVER1_EL1 is not supported. Writes to
+                             ICC_PPI_SACTIVER1_EL1 are treated as RAW writes of
+                             the underlying state.
+    ICC_PPI_SPENDR0_EL1      ICC_PPI_CPENDR0_EL1 is not supported. Writes to
+                             ICC_PPI_SPENDR0_EL1 are treated as RAW writes of
+                             the underlying state.
+    ICC_PPI_SPENDR1_EL1      ICC_PPI_CPENDR1_EL1 is not supported. Writes to
+                             ICC_PPI_SPENDR1_EL1 are treated as RAW writes of
+                             the underlying state.
+    ICC_PPI_PRIORITYR0_EL1
+    ICC_PPI_PRIORITYR1_EL1
+    ICC_PPI_PRIORITYR2_EL1
+    ICC_PPI_PRIORITYR3_EL1
+    ICC_PPI_PRIORITYR4_EL1
+    ICC_PPI_PRIORITYR5_EL1
+    ICC_PPI_PRIORITYR6_EL1
+    ICC_PPI_PRIORITYR7_EL1
+    ICC_PPI_PRIORITYR8_EL1
+    ICC_PPI_PRIORITYR9_EL1
+    ICC_PPI_PRIORITYR10_EL1
+    ICC_PPI_PRIORITYR11_EL1
+    ICC_PPI_PRIORITYR12_EL1
+    ICC_PPI_PRIORITYR13_EL1
+    ICC_PPI_PRIORITYR14_EL1
+    ICC_PPI_PRIORITYR15_EL1
+    ICC_APR_EL1
+    ICC_CR0_EL1
+    ICC_PCR_EL1
+    =======================  ===================================================
-- 
2.34.1


^ permalink raw reply related

* [PATCH v2 35/39] KVM: arm64: gic-v5: Implement save/restore mechanisms for ISTs
From: Sascha Bischoff @ 2026-05-21 15:01 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

When running a GICv5 VM, there are up to two ISTs that must be saved
or restored when migrating a VM.

The SPI IST is allocated by the hypervisor, as the guest presumes the
memory for the SPI state is allocated by the hardware. The LPI IST, on
the other hand, is allocated by the guest in the event that it wishes
to use LPIs. We shadow the guest's LPI IST in KVM, and therefore the
guest's memory is never directly used by the GICv5 hardware. Hence, in
both cases, the in-use ISTs are allocated by the hypervisor.

As there is no guest-allocated memory for the SPI IST, the state of
this must be saved by the VMM. Therefore, the VMM must provide a
memory buffer large enough to store/restore the SPI IST (32-bits per
SPI).

The LPI IST, if present, is stored into guest memory as the guest has
already allocated storage under the assumption that it would be used
by the GIC. Each IST Entry is written back to guest memory (skipping
metadata sections) on a save, or restored from guest memory on a
restore. The guest is only allowed to create a linear IST, so there's
a sufficiently large region of memory that is contiguous in GPA space.

On a save, the VM itself is quiesced using IRS_SAVE_VMR - this ensures
that the hardware has written all interrupt state back to the
ISTs. Following the save operation, the IRS_SAVE_VM_STATUSR is checked
to ensure that the guest has remained quiescent. In the event that it
has not, an error is propagated back to the VMM such that it can retry
the save.

On restore, the VM is first made invalid - it is not allowed to write
to any of the tables while they are valid - and then the SPI and LPI
ISTs are restored (if required) before making the VM valid again. As
part of restoring the ISTs, any pending interrupts are tracked, and
IST pending state is cleared. Once the VM is made valid, these valid
interrupts are made pending again via the GIC VDPEND system
instruction.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 arch/arm64/include/uapi/asm/kvm.h       |   1 +
 arch/arm64/kvm/vgic/vgic-irs-v5.c       |  20 +
 arch/arm64/kvm/vgic/vgic-kvm-device.c   |  13 +
 arch/arm64/kvm/vgic/vgic-v5-tables.c    | 645 ++++++++++++++++++++++++
 arch/arm64/kvm/vgic/vgic-v5-tables.h    |  12 +
 arch/arm64/kvm/vgic/vgic-v5.c           | 286 +++++++++++
 arch/arm64/kvm/vgic/vgic.h              |   3 +
 tools/arch/arm64/include/uapi/asm/kvm.h |   1 +
 8 files changed, 981 insertions(+)

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 710a0d267347d..1b9bbeab18a4e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -423,6 +423,7 @@ enum {
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
 #define KVM_DEV_ARM_VGIC_GRP_IRS_REGS	10
+#define KVM_DEV_ARM_VGIC_GRP_IST	11
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/arch/arm64/kvm/vgic/vgic-irs-v5.c b/arch/arm64/kvm/vgic/vgic-irs-v5.c
index b7808555adc82..92f646036439f 100644
--- a/arch/arm64/kvm/vgic/vgic-irs-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-irs-v5.c
@@ -945,6 +945,26 @@ int kvm_vgic_v5_irs_init(struct kvm *kvm, unsigned int nr_spis)
 	return 0;
 }
 
+int vgic_v5_irs_lpi_ist_id_bits(struct kvm *kvm, unsigned int *id_bits)
+{
+	struct vgic_v5_irs *irs = kvm->arch.vgic.vgic_v5_irs_data;
+
+	if (WARN_ON_ONCE(!irs))
+		return -ENXIO;
+
+	if (!irs->ist_baser.valid)
+		return 0;
+
+	if (!vgic_v5_ist_cfgr_valid(irs)) {
+		kvm_err("Guest programmed invalid IRS_IST_CFGR\n");
+		return -EINVAL;
+	}
+
+	*id_bits = irs->ist_cfgr.lpi_id_bits;
+
+	return 1;
+}
+
 int vgic_v5_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	const struct vgic_register_region *region;
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index cab3d6db070ac..afea89b99411f 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -902,6 +902,11 @@ static int vgic_v5_set_attr(struct kvm_device *dev,
 	switch (attr->group) {
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_IST:
+		if (attr->attr)
+			return -ENXIO;
+
+		return vgic_v5_irs_restore_ists(dev->kvm, attr);
 	case KVM_DEV_ARM_VGIC_GRP_IRS_REGS:
 		fallthrough;
 	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
@@ -930,6 +935,11 @@ static int vgic_v5_get_attr(struct kvm_device *dev,
 	switch (attr->group) {
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_IST:
+		if (attr->attr)
+			return -ENXIO;
+
+		return vgic_v5_irs_save_ists(dev->kvm, attr);
 	case KVM_DEV_ARM_VGIC_GRP_IRS_REGS:
 		fallthrough;
 	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
@@ -979,6 +989,9 @@ static int vgic_v5_has_attr(struct kvm_device *dev,
 		default:
 			return -ENXIO;
 		}
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_IST:
+		return attr->attr ? -ENXIO : 0;
 	default:
 		return -ENXIO;
 	}
diff --git a/arch/arm64/kvm/vgic/vgic-v5-tables.c b/arch/arm64/kvm/vgic/vgic-v5-tables.c
index 2df470d29d64a..b499731aa4ec4 100644
--- a/arch/arm64/kvm/vgic/vgic-v5-tables.c
+++ b/arch/arm64/kvm/vgic/vgic-v5-tables.c
@@ -59,6 +59,20 @@ static DEFINE_XARRAY(vm_info);
 #define GICV5_VPED_ADDR_SHIFT		3ULL
 #define GICV5_VPED_ADDR			GENMASK_ULL(55, 3)
 
+/* L2 Interrupt State Table Entry */
+#define GICV5_ISTL2E_PENDING		BIT(0)
+#define GICV5_ISTL2E_ACTIVE		BIT(1)
+#define GICV5_ISTL2E_HM			BIT(2)
+#define GICV5_ISTL2E_ENABLE		BIT(3)
+#define GICV5_ISTL2E_IRM		BIT(4)
+#define GICV5_ISTL2E_HWU		GENMASK(10, 9)
+#define GICV5_ISTL2E_PRIORITY		GENMASK(15, 11)
+#define GICV5_ISTL2E_IAFFID		GENMASK(31, 16)
+
+#define GICV5_ISTE_SIZE(istsz)		BIT((istsz) + 2)
+#define GICV5_LINEAR_IST_SIZE(id_bits, istsz)	\
+	(BIT(id_bits) * GICV5_ISTE_SIZE(istsz))
+
 /*
  * The LPI and SPI configuration is stored in the 2nd and 3rd 64-bit chunks of
  * the VMTE (0-based). We call this a section here in an attempt to simplify the
@@ -67,6 +81,26 @@ static DEFINE_XARRAY(vm_info);
 #define GICV5_VMTEL2_LPI_SECTION	2
 #define GICV5_VMTEL2_SPI_SECTION	3
 
+struct vgic_v5_ist_desc {
+	struct vgic_v5_vm_info	*vmi;
+	void			*base;
+	unsigned int		id_bits;
+	unsigned int		istsz;
+	unsigned int		l2sz;
+	size_t			iste_size;
+	bool			present;
+};
+
+struct vgic_v5_two_level_ist_shape {
+	size_t	l1_entries;
+	size_t	l2_entries;
+};
+
+struct vgic_v5_pending_irq {
+	u32			irq;
+	struct list_head	next;
+};
+
 static int vgic_v5_alloc_linear_ist(struct kvm *kvm, bool spi_ist,
 				    unsigned int id_bits,
 				    unsigned int istsz);
@@ -100,6 +134,22 @@ static void vgic_v5_clean_inval(void *va, size_t size)
 		dcache_clean_inval_poc(base, base + size);
 }
 
+static void vgic_v5_drain_pending_irqs(struct kvm *kvm,
+				       struct vgic_v5_vm_info *vmi,
+				       bool reinject)
+{
+	struct vgic_v5_pending_irq *pirq, *tmp;
+
+	list_for_each_entry_safe(pirq, tmp, &vmi->pending_irqs, next) {
+		if (reinject)
+			kvm_call_hyp(__vgic_v5_vdpend, pirq->irq, true,
+				     vgic_v5_vm_id(kvm));
+
+		list_del(&pirq->next);
+		kfree(pirq);
+	}
+}
+
 /*
  * Create a linear VM Table. Directly using the number of entries supplied as
  * the size of an L2 VMTE (32 bytes) guarantees that our allocation is aligned per
@@ -440,6 +490,13 @@ int vgic_v5_vmte_init(struct kvm *kvm)
 	if (ret)
 		goto out_fail;
 
+	/*
+	 * If we are restoring the state of a guest, we need to re-inject any
+	 * IRQs that were pending when the state of the guest was originally
+	 * saved. We use the pending_irqs list for this.
+	 */
+	INIT_LIST_HEAD(&vmi->pending_irqs);
+
 	/* Allocate and assign the VM Descriptor, if required. */
 	if (vmt_info->vmd_size != 0) {
 		vmd = kzalloc(vmt_info->vmd_size, GFP_KERNEL);
@@ -544,6 +601,9 @@ int vgic_v5_vmte_release(struct kvm *kvm)
 	kfree(vmi->vpet_base);
 	kfree(vmi->vmd_base);
 
+	/* Unlikely, but possible. Avoid leaking the memory. */
+	vgic_v5_drain_pending_irqs(kvm, vmi, false);
+
 	/* If we have an LPI IST, free it */
 	if (vmi->h_lpi_ist) {
 		ret = vgic_v5_lpi_ist_free(kvm);
@@ -1112,6 +1172,18 @@ static int vgic_v5_spi_ist_free(struct kvm *kvm)
 	return vgic_v5_linear_ist_free(kvm, true);
 }
 
+int vgic_v5_lpi_ist_exists(struct kvm *kvm)
+{
+	u16 vm_id = vgic_v5_vm_id(kvm);
+	struct vgic_v5_vm_info *vmi;
+
+	vmi = xa_load(&vm_info, vm_id);
+	if (WARN_ON_ONCE(!vmi))
+		return -ENXIO;
+
+	return !!vmi->h_lpi_ist;
+}
+
 /*
  * Allocate an IST for LPIs.
  *
@@ -1184,3 +1256,576 @@ int vgic_v5_lpi_ist_free(struct kvm *kvm)
 	else
 		return vgic_v5_two_level_ist_free(kvm, false);
 }
+
+static struct vgic_v5_two_level_ist_shape
+vgic_v5_two_level_ist_shape(const struct vgic_v5_ist_desc *ist)
+{
+	struct vgic_v5_two_level_ist_shape shape;
+	size_t l2bits, n;
+
+	l2bits = (10 - ist->istsz) + (2 * ist->l2sz);
+	n = max(2, ist->id_bits - l2bits + 3 - 1);
+
+	shape.l1_entries = BIT(n + 1) / GICV5_IRS_ISTL1E_SIZE;
+	shape.l2_entries = BIT(l2bits);
+
+	return shape;
+}
+
+static int vgic_v5_read_vm_ist_desc(struct kvm *kvm, unsigned int section,
+				    struct vgic_v5_ist_desc *ist)
+{
+	u16 vm_id = vgic_v5_vm_id(kvm);
+	struct vmtl2_entry *vmte;
+	u64 vmte_ist_section;
+
+	vmte = vgic_v5_get_l2_vmte(vm_id);
+	if (IS_ERR(vmte))
+		return PTR_ERR(vmte);
+
+	vgic_v5_clean_inval(vmte, sizeof(*vmte));
+	vmte_ist_section = le64_to_cpu(READ_ONCE(vmte->val[section]));
+
+	ist->id_bits = FIELD_GET(GICV5_VMTEL2E_IST_ID_BITS, vmte_ist_section);
+	ist->istsz = FIELD_GET(GICV5_VMTEL2E_IST_ISTSZ, vmte_ist_section);
+	ist->l2sz = FIELD_GET(GICV5_VMTEL2E_IST_L2SZ, vmte_ist_section);
+	ist->iste_size = GICV5_ISTE_SIZE(ist->istsz);
+
+	return vmte_ist_section & GICV5_VMTEL2E_IST_VALID;
+}
+
+static int vgic_v5_get_spi_ist_desc(struct kvm *kvm, bool userspace_buf,
+				    struct vgic_v5_ist_desc *ist)
+{
+	u16 vm_id = vgic_v5_vm_id(kvm);
+	int ret;
+
+	memset(ist, 0, sizeof(*ist));
+
+	ist->vmi = xa_load(&vm_info, vm_id);
+	if (WARN_ON_ONCE(!ist->vmi))
+		return -ENXIO;
+
+	ret = vgic_v5_read_vm_ist_desc(kvm, GICV5_VMTEL2_SPI_SECTION, ist);
+	if (ret < 0)
+		return ret;
+
+	ist->base = ist->vmi->h_spi_ist;
+
+	/* We don't have SPIs, but userspace is trying to save/restore them. */
+	if (!ist->base && userspace_buf)
+		return -ENOENT;
+
+	/* We have SPIs but userspace isn't trying to save/restore them. */
+	if (ist->base && !userspace_buf)
+		return -EINVAL;
+
+	/* No SPIs and no userspace buffer: nothing to do. */
+	if (!ist->base && !userspace_buf)
+		return 0;
+
+	ist->present = true;
+	return 0;
+}
+
+static int vgic_v5_get_lpi_ist_desc(struct kvm *kvm,
+				    struct vgic_v5_ist_desc *ist)
+{
+	u16 vm_id = vgic_v5_vm_id(kvm);
+	bool guest_valid, host_valid;
+	int ret;
+
+	memset(ist, 0, sizeof(*ist));
+
+	ist->vmi = xa_load(&vm_info, vm_id);
+	if (WARN_ON_ONCE(!ist->vmi))
+		return -ENXIO;
+
+	ret = vgic_v5_read_vm_ist_desc(kvm, GICV5_VMTEL2_LPI_SECTION, ist);
+	if (ret < 0)
+		return ret;
+
+	host_valid = ret;
+	guest_valid = kvm->arch.vgic.vgic_v5_irs_data->ist_baser.valid;
+	ist->base = ist->vmi->h_lpi_ist;
+
+	/* If there is no IST to save/restore, return without error. */
+	if (!guest_valid && !host_valid && !ist->base)
+		return 0;
+
+	/* Mismatched combination of valid state */
+	if (!guest_valid || !host_valid || !ist->base)
+		return -ENXIO;
+
+	if (ist->vmi->h_lpi_ist_structure && !ist->vmi->h_lpi_l2_ists)
+		return -ENXIO;
+
+	ist->present = true;
+	return 0;
+}
+
+/*
+ * Save the SPI IST to userspace-provided memory.
+ *
+ * Only the architected 32-bit ISTE state is exposed to userspace. Host
+ * metadata is skipped when striding through the linear host SPI IST.
+ */
+int vgic_v5_save_spi_ist(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
+	struct vgic_v5_ist_desc ist;
+	__le32 h_iste;
+	int ret;
+
+	ret = vgic_v5_get_spi_ist_desc(kvm, !!attr->addr, &ist);
+	if (ret || !ist.present)
+		return ret;
+
+	vgic_v5_clean_inval(ist.base,
+			    GICV5_LINEAR_IST_SIZE(ist.id_bits, ist.istsz));
+
+	/* The host SPI IST is always linear. */
+	for (unsigned int i = 0; i < kvm->arch.vgic.nr_spis; ++i) {
+		/*
+		 * Only the low 32 bits are saved. Any host metadata after the
+		 * architected ISTE is skipped by the host ISTE stride.
+		 */
+		__le32 *h_iste_addr = ist.base + i * ist.iste_size;
+
+		h_iste = READ_ONCE(*h_iste_addr);
+		ret = put_user(h_iste, uaddr);
+		if (ret)
+			return ret;
+
+		uaddr++;
+	}
+
+	return 0;
+}
+
+/*
+ * Save a Linear host LPI IST to guest memory.
+ *
+ * Only the architected 32-bit ISTE state is stored. Host metadata is skipped
+ * when striding through the host's LPI IST.
+ *
+ * The guest's LPI IST is always Linear.
+ */
+static int vgic_v5_save_linear_lpi_ist(struct kvm *kvm,
+				       const struct vgic_v5_ist_desc *ist,
+				       gpa_t g_entry_addr)
+{
+	size_t h_l2_index, h_l2_entries;
+	__le32 h_iste;
+	int ret;
+
+	h_l2_entries = BIT(ist->id_bits);
+
+	vgic_v5_clean_inval(ist->base,
+			    GICV5_LINEAR_IST_SIZE(ist->id_bits, ist->istsz));
+
+	for (h_l2_index = 0; h_l2_index < h_l2_entries; h_l2_index++) {
+		__le32 *h_iste_addr = ist->base + h_l2_index * ist->iste_size;
+
+		h_iste = *h_iste_addr;
+		ret = vgic_write_guest_lock(kvm, g_entry_addr, &h_iste,
+					    sizeof(h_iste));
+		if (ret)
+			return ret;
+
+		g_entry_addr += sizeof(h_iste);
+	}
+
+	return 0;
+}
+
+/*
+ * Save a Two-level host LPI IST to guest memory.
+ *
+ * Only the architected 32-bit ISTE state is stored. Host metadata is skipped
+ * when striding through the host's IST.
+ *
+ * The guest's LPI IST is always Linear.
+ */
+static int vgic_v5_save_two_level_lpi_ist(struct kvm *kvm,
+					  const struct vgic_v5_ist_desc *ist,
+					  gpa_t g_entry_addr)
+{
+	struct vgic_v5_two_level_ist_shape shape;
+	size_t h_l1_index, h_l2_index;
+	void *h_l2_ist_base;
+	__le32 h_iste;
+	int ret;
+
+	shape = vgic_v5_two_level_ist_shape(ist);
+
+	vgic_v5_clean_inval(ist->base,
+			    shape.l1_entries * sizeof(*ist->vmi->h_lpi_ist));
+
+	for (h_l1_index = 0; h_l1_index < shape.l1_entries; h_l1_index++) {
+		u64 l1_iste;
+
+		/*
+		 * Host L2 ISTs are preallocated. Any invalid L1 entry means the
+		 * host IST state is inconsistent.
+		 */
+		l1_iste = le64_to_cpu(READ_ONCE(ist->vmi->h_lpi_ist[h_l1_index]));
+		if (!FIELD_GET(GICV5_ISTL1E_VALID, l1_iste))
+			return -ENXIO;
+
+		h_l2_ist_base = ist->vmi->h_lpi_l2_ists[h_l1_index];
+		if (!h_l2_ist_base)
+			return -ENXIO;
+
+		vgic_v5_clean_inval(h_l2_ist_base,
+				    shape.l2_entries * ist->iste_size);
+
+		for (h_l2_index = 0; h_l2_index < shape.l2_entries; h_l2_index++) {
+			h_iste = *(__le32 *)(h_l2_ist_base +
+					     h_l2_index * ist->iste_size);
+
+			ret = vgic_write_guest_lock(kvm, g_entry_addr,
+						    &h_iste, sizeof(h_iste));
+			if (ret)
+				return ret;
+
+			g_entry_addr += sizeof(__le32);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Save the LPI IST to guest memory
+ *
+ * The guest LPI IST is exposed as a linear GPA range. The host LPI IST may be
+ * linear or two-level, so host iteration depends on the allocated host shape.
+ *
+ * Only the architected 32-bit ISTE state is saved. Host metadata is rebuilt on
+ * restore.
+ */
+int vgic_v5_save_lpi_ist(struct kvm *kvm)
+{
+	struct vgic_v5_ist_desc ist;
+	gpa_t g_entry_addr;
+	int ret;
+
+	ret = vgic_v5_get_lpi_ist_desc(kvm, &ist);
+	if (ret || !ist.present)
+		return ret;
+
+	/* The guest LPI IST is saved through its linear GPA range. */
+	g_entry_addr = kvm->arch.vgic.vgic_v5_irs_data->ist_baser.addr;
+
+	if (!ist.vmi->h_lpi_ist_structure)
+		return vgic_v5_save_linear_lpi_ist(kvm, &ist, g_entry_addr);
+
+	return vgic_v5_save_two_level_lpi_ist(kvm, &ist, g_entry_addr);
+}
+
+/*
+ * Track any SPIs and LPIs that were marked as pending at the point where the
+ * IST was restored.
+ *
+ * Restored pending state is cleared from the host IST and replayed with VDPEND
+ * before the VM first runs.
+ */
+static int vgic_v5_track_pending_irq(struct list_head *pending_irqs, u32 intid,
+				     u32 type)
+{
+	struct vgic_v5_pending_irq *pirq;
+
+	pirq = kzalloc_obj(*pirq, GFP_KERNEL);
+	if (!pirq)
+		return -ENOMEM;
+
+	/* Encode the interrupt as a GICv5 IntID. */
+	pirq->irq = FIELD_PREP(GICV5_HWIRQ_TYPE, type) |
+		    FIELD_PREP(GICV5_HWIRQ_ID, intid);
+
+	INIT_LIST_HEAD(&pirq->next);
+	list_add_tail(&pirq->next, pending_irqs);
+
+	return 0;
+}
+
+/*
+ * Process and sanitise each restored ISTE.
+ *
+ * HWU is for hardware use and must not survive migration. Pending state is
+ * tracked, cleared from the ISTE, and replayed before the VM first runs.
+ */
+static int vgic_v5_process_iste(__le32 *iste, struct list_head *pending_irqs,
+				u32 intid, u32 type)
+{
+	u32 iste_data = le32_to_cpu(READ_ONCE(*iste));
+	int ret;
+
+	/* Pending state is replayed later with VDPEND. */
+	if (iste_data & GICV5_ISTL2E_PENDING) {
+		ret = vgic_v5_track_pending_irq(pending_irqs, intid, type);
+		if (ret)
+			return ret;
+	}
+
+	iste_data &= ~GICV5_ISTL2E_PENDING;
+	iste_data &= ~GICV5_ISTL2E_HWU;
+
+	WRITE_ONCE(*iste, cpu_to_le32(iste_data));
+
+	return 0;
+}
+
+/*
+ * As part of restoring SPIs, sync back their handling modes to KVM. This is
+ * handled via the IRS's MMIO interface during normal operation, but we need to
+ * do this explicitly on restore.
+ */
+static void vgic_v5_restore_spi_config(struct kvm *kvm, __le32 iste, u32 spi)
+{
+	struct vgic_irq *irq;
+
+	irq = vgic_get_irq(kvm, vgic_v5_make_spi(spi));
+	if (WARN_ON_ONCE(!irq))
+		return;
+
+	scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+		if (le32_to_cpu(iste) & GICV5_ISTL2E_HM)
+			irq->config = VGIC_CONFIG_LEVEL;
+		else
+			irq->config = VGIC_CONFIG_EDGE;
+	}
+
+	vgic_put_irq(kvm, irq);
+}
+
+/*
+ * Restore the SPI IST from userspace-provided buffer to the host-allocated IST.
+ *
+ * Userspace supplies the architected 32-bit SPI ISTEs, only.
+ */
+int vgic_v5_restore_spi_ist(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
+	struct vgic_v5_ist_desc ist;
+	__le32 h_iste;
+	int ret;
+
+	ret = vgic_v5_get_spi_ist_desc(kvm, !!attr->addr, &ist);
+	if (ret || !ist.present)
+		return ret;
+
+	/*
+	 * The saved SPI IST is linear and contains only architected 32-bit
+	 * ISTEs. The host ISTE stride skips host metadata sections.
+	 */
+	for (unsigned int i = 0; i < kvm->arch.vgic.nr_spis; i++) {
+		void *h_iste_addr = ist.base + i * ist.iste_size;
+
+		ret = get_user(h_iste, uaddr);
+		if (ret)
+			return ret;
+
+		/*
+		 * Sanitise the IST, clearing HWU & pending fields. Pending
+		 * state is later replayed via GIC VDPEND.
+		 */
+		ret = vgic_v5_process_iste(&h_iste, &ist.vmi->pending_irqs,
+					   i, GICV5_HWIRQ_TYPE_SPI);
+		if (ret)
+			return ret;
+
+		/* Update KVM's SPI level/edge tracking to match the ISTE */
+		vgic_v5_restore_spi_config(kvm, h_iste, i);
+
+		/*
+		 * Zero the full ISTE (incl metadata), and write back the
+		 * non-metadata region, only.
+		 */
+		memset(h_iste_addr, 0, ist.iste_size);
+		WRITE_ONCE(*(__le32 *)h_iste_addr, h_iste);
+		vgic_v5_clean_inval(h_iste_addr, ist.iste_size);
+
+		uaddr++;
+	}
+
+	return 0;
+}
+
+/*
+ * Restore the LPI IST from guest memory to the Linear host-allocated LPI IST.
+ *
+ * The guest LPI IST is restored from a linear GPA range.
+ *
+ * Only the lower 32-bits of each ISTE are restored.
+ */
+static int vgic_v5_restore_linear_lpi_ist(struct kvm *kvm,
+					  const struct vgic_v5_ist_desc *ist,
+					  gpa_t g_entry_addr)
+{
+	size_t h_l2_index, h_l2_entries;
+	__le32 h_iste;
+	int ret;
+
+	h_l2_entries = BIT(ist->id_bits);
+
+	for (h_l2_index = 0; h_l2_index < h_l2_entries; h_l2_index++) {
+		void *h_iste_addr = ist->base + h_l2_index * ist->iste_size;
+
+		ret = kvm_read_guest_lock(kvm, g_entry_addr, &h_iste,
+					  sizeof(h_iste));
+		if (ret)
+			return ret;
+
+		/*
+		 * Sanitise the IST, clearing HWU & pending fields. Pending
+		 * state is later replayed via GIC VDPEND.
+		 */
+		ret = vgic_v5_process_iste(&h_iste, &ist->vmi->pending_irqs,
+					   h_l2_index, GICV5_HWIRQ_TYPE_LPI);
+		if (ret)
+			return ret;
+
+		/*
+		 * Zero the full ISTE (incl metadata), and write back the
+		 * non-metadata region, only.
+		 */
+		memset(h_iste_addr, 0, ist->iste_size);
+		WRITE_ONCE(*(__le32 *)h_iste_addr, h_iste);
+		vgic_v5_clean_inval(h_iste_addr, ist->iste_size);
+
+		g_entry_addr += sizeof(h_iste);
+	}
+
+	return 0;
+}
+
+/*
+ * Restore the LPI IST from guest memory to the Two-level host-allocated LPI
+ * IST.
+ *
+ * The guest LPI IST is restored from a linear GPA range.
+ *
+ * Only the lower 32-bits of each ISTE are restored.
+ */
+static int vgic_v5_restore_two_level_lpi_ist(struct kvm *kvm,
+					     const struct vgic_v5_ist_desc *ist,
+					     gpa_t g_entry_addr)
+{
+	struct vgic_v5_two_level_ist_shape shape;
+	size_t h_l1_index, h_l2_index;
+	void *h_l2_ist_base;
+	__le32 h_iste;
+	int ret;
+
+	shape = vgic_v5_two_level_ist_shape(ist);
+
+	vgic_v5_clean_inval(ist->vmi->h_lpi_ist,
+			    shape.l1_entries * sizeof(*ist->vmi->h_lpi_ist));
+
+	for (h_l1_index = 0; h_l1_index < shape.l1_entries; ++h_l1_index) {
+		u64 l1_iste;
+
+		/*
+		 * Host L2 ISTs are preallocated. Any invalid L1 entry means the
+		 * host IST state is inconsistent.
+		 */
+		l1_iste = le64_to_cpu(READ_ONCE(ist->vmi->h_lpi_ist[h_l1_index]));
+		if (!FIELD_GET(GICV5_ISTL1E_VALID, l1_iste))
+			return -ENXIO;
+
+		h_l2_ist_base = ist->vmi->h_lpi_l2_ists[h_l1_index];
+		if (!h_l2_ist_base)
+			return -ENXIO;
+
+		for (h_l2_index = 0; h_l2_index < shape.l2_entries; h_l2_index++) {
+			void *h_iste_addr = h_l2_ist_base +
+					    h_l2_index * ist->iste_size;
+
+			ret = kvm_read_guest_lock(kvm, g_entry_addr,
+						  &h_iste, sizeof(h_iste));
+			if (ret)
+				return ret;
+
+			/*
+			 * Sanitise the IST, clearing HWU & pending
+			 * fields. Pending state is later replayed via GIC
+			 * VDPEND.
+			 */
+			ret = vgic_v5_process_iste(&h_iste, &ist->vmi->pending_irqs,
+						   h_l1_index * shape.l2_entries + h_l2_index,
+						   GICV5_HWIRQ_TYPE_LPI);
+			if (ret)
+				return ret;
+
+			/*
+			 * Zero the full ISTE (incl metadata), and write back
+			 * the non-metadata region, only.
+			 */
+			memset(h_iste_addr, 0, ist->iste_size);
+			WRITE_ONCE(*(__le32 *)h_iste_addr, h_iste);
+			vgic_v5_clean_inval(h_iste_addr, ist->iste_size);
+
+			g_entry_addr += sizeof(h_iste);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Restore the LPI IST from guest memory to the host-allocated LPI IST.
+ *
+ * The guest LPI IST is restored from a linear GPA range. The host LPI IST may
+ * be linear or two-level, so host iteration depends on the allocated host
+ * shape.
+ */
+int vgic_v5_restore_lpi_ist(struct kvm *kvm)
+{
+	struct vgic_v5_ist_desc ist;
+	gpa_t g_entry_addr;
+	int ret;
+
+	ret = vgic_v5_get_lpi_ist_desc(kvm, &ist);
+	if (ret || !ist.present)
+		return ret;
+
+	/* The guest LPI IST is restored through its linear GPA range. */
+	g_entry_addr = kvm->arch.vgic.vgic_v5_irs_data->ist_baser.addr;
+
+	if (!ist.vmi->h_lpi_ist_structure)
+		return vgic_v5_restore_linear_lpi_ist(kvm, &ist, g_entry_addr);
+
+	return vgic_v5_restore_two_level_lpi_ist(kvm, &ist, g_entry_addr);
+}
+
+/*
+ * Process the pending IRQs removing them from the list and optionally injecting
+ * them.
+ */
+static int vgic_v5_process_pending_irqs(struct kvm *kvm, bool inject)
+{
+	u16 vm_id = vgic_v5_vm_id(kvm);
+	struct vgic_v5_vm_info *vmi;
+
+	vmi = xa_load(&vm_info, vm_id);
+	if (WARN_ON_ONCE(!vmi))
+		return -ENXIO;
+
+	vgic_v5_drain_pending_irqs(kvm, vmi, inject);
+
+	return 0;
+}
+
+/* Replay pending state that was cleared while restoring guest IST state. */
+int vgic_v5_restore_pending_irqs(struct kvm *kvm)
+{
+	return vgic_v5_process_pending_irqs(kvm, true);
+}
+
+/* Drop pending state collected by a failed IST restore. */
+void vgic_v5_discard_pending_irqs(struct kvm *kvm)
+{
+	vgic_v5_process_pending_irqs(kvm, false);
+}
diff --git a/arch/arm64/kvm/vgic/vgic-v5-tables.h b/arch/arm64/kvm/vgic/vgic-v5-tables.h
index 0ca0ae798dda6..ec54208e8825b 100644
--- a/arch/arm64/kvm/vgic/vgic-v5-tables.h
+++ b/arch/arm64/kvm/vgic/vgic-v5-tables.h
@@ -8,6 +8,7 @@
 
 #include <linux/idr.h>
 #include <linux/irqchip/arm-gic-v5.h>
+#include <linux/list.h>
 
 /* Level 1 Virtual Machine Table Entry */
 typedef __le64 vmtl1_entry;
@@ -43,6 +44,9 @@ struct vgic_v5_vm_info {
 	__le64			*h_lpi_ist;
 	__le64			**h_lpi_l2_ists;
 	__le64			*h_spi_ist;
+
+	/* Tracking of pending interrupts as part of IST restore */
+	struct list_head	pending_irqs;
 };
 
 struct vgic_v5_vmt {
@@ -95,7 +99,15 @@ int vgic_v5_vmte_alloc_vpe(struct kvm_vcpu *vcpu);
 int vgic_v5_vmte_free_vpe(struct kvm_vcpu *vcpu);
 
 int vgic_v5_spi_ist_allocate(struct kvm *kvm, unsigned int id_bits);
+int vgic_v5_lpi_ist_exists(struct kvm *kvm);
 int vgic_v5_lpi_ist_alloc(struct kvm *kvm, unsigned int id_bits);
 int vgic_v5_lpi_ist_free(struct kvm *kvm);
 
+int vgic_v5_save_spi_ist(struct kvm *kvm, struct kvm_device_attr *attr);
+int vgic_v5_save_lpi_ist(struct kvm *kvm);
+int vgic_v5_restore_spi_ist(struct kvm *kvm, struct kvm_device_attr *attr);
+int vgic_v5_restore_lpi_ist(struct kvm *kvm);
+int vgic_v5_restore_pending_irqs(struct kvm *kvm);
+void vgic_v5_discard_pending_irqs(struct kvm *kvm);
+
 #endif
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 05fd10030da84..f89028082529a 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -8,6 +8,7 @@
 #include <linux/bitops.h>
 #include <linux/irqchip/arm-vgic-info.h>
 #include <linux/irqdomain.h>
+#include <linux/kvm_host.h>
 
 #include "vgic.h"
 #include "vgic-v5-tables.h"
@@ -240,6 +241,17 @@ static int vgic_v5_irs_wait_for_vpe_op(void)
 					NULL);
 }
 
+/*
+ * Wait for a write to IRS_SAVE_VMR to complete.
+ */
+static int vgic_v5_irs_wait_for_save_vm_op(u32 *statusr)
+{
+	return gicv5_wait_for_op_atomic(irs_caps.irs_base,
+					GICV5_IRS_SAVE_VM_STATUSR,
+					GICV5_IRS_SAVE_VM_STATUSR_IDLE,
+					statusr);
+}
+
 static int vgic_v5_irs_write_vm_mmio_reg(u64 val, u32 offset)
 {
 	int ret;
@@ -401,6 +413,27 @@ static int vgic_v5_irs_set_up_vpe(u16 vm_id, u16 vpe_id,
 	return 0;
 }
 
+static int vgic_v5_irs_save_vm_op(u16 vm_id, bool save, u32 *statusr)
+{
+	u64 save_vmr;
+	int ret;
+
+	save_vmr = FIELD_PREP(GICV5_IRS_SAVE_VMR_VM_ID, vm_id);
+	save_vmr |= GICV5_IRS_SAVE_VMR_Q;
+	save_vmr |= FIELD_PREP(GICV5_IRS_SAVE_VMR_S, save);
+
+	guard(raw_spinlock_irqsave)(&global_irs_lock);
+
+	/* Make sure that we are idle to begin with. */
+	ret = vgic_v5_irs_wait_for_save_vm_op(NULL);
+	if (ret)
+		return ret;
+
+	irs_writeq_relaxed(save_vmr, GICV5_IRS_SAVE_VMR);
+
+	return vgic_v5_irs_wait_for_save_vm_op(statusr);
+}
+
 static irqreturn_t db_handler(int irq, void *data)
 {
 	struct kvm_vcpu *vcpu = data;
@@ -1212,6 +1245,46 @@ void vgic_v5_set_spi_ops(struct vgic_irq *irq)
 	irq->ops = &vgic_v5_spi_irq_ops;
 }
 
+/*
+ * Rebuild the global SPI AP list after restoring the IST. Pending state is
+ * replayed directly to the IRS, so read the restored hardware state back before
+ * deciding whether an SPI must be tracked by KVM.
+ */
+static void vgic_v5_restore_spi_ap_list(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	for (unsigned int i = 0; i < dist->nr_spis; i++) {
+		struct vgic_irq *irq = vgic_get_irq(kvm, vgic_v5_make_spi(i));
+		unsigned long flags;
+		bool pending;
+		u64 icsr;
+
+		if (WARN_ON_ONCE(!irq))
+			continue;
+
+		raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+		icsr = kvm_call_hyp_ret(__vgic_v5_vdrcfg, irq->intid);
+		irq->active = !!FIELD_GET(ICC_ICSR_EL1_Active, icsr);
+		pending = !!FIELD_GET(ICC_ICSR_EL1_Pending, icsr);
+
+		if (irq->config == VGIC_CONFIG_EDGE)
+			irq->pending_latch = pending;
+
+		if (irq->config == VGIC_CONFIG_LEVEL &&
+		    !(pending || irq->active))
+			irq->pending_latch = false;
+
+		if (irq->active || pending)
+			vgic_v5_spi_queue_irq_unlock(kvm, irq, flags);
+		else
+			raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+		vgic_put_irq(kvm, irq);
+	}
+}
+
 /* Set the pending state for GICv5 SPIs and LPIs */
 void vgic_v5_set_irq_pend(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
 {
@@ -1353,3 +1426,216 @@ void vgic_v5_save_state(struct kvm_vcpu *vcpu)
 	__vgic_v5_save_ppi_state(cpu_if);
 	dsb(sy);
 }
+
+static int vgic_v5_irs_status_is_quiesced(u32 statusr)
+{
+	if (statusr & GICV5_IRS_SAVE_VM_STATUSR_Q)
+		return 0;
+
+	return -EBUSY;
+}
+
+static int vgic_v5_irs_is_quiesced(u16 vm_id)
+{
+	u32 statusr;
+	int ret;
+
+	ret = vgic_v5_irs_save_vm_op(vm_id, false, &statusr);
+	if (ret)
+		return ret;
+
+	return vgic_v5_irs_status_is_quiesced(statusr);
+}
+
+int vgic_v5_irs_save_ists(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret = 0;
+	u32 statusr;
+	u16 vm_id = vgic_v5_vm_id(kvm);
+
+	mutex_lock(&kvm->lock);
+
+	if (kvm_trylock_all_vcpus(kvm)) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	mutex_lock(&kvm->arch.config_lock);
+
+	if (!vgic_initialized(kvm)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	ret = vgic_v5_irs_save_vm_op(vm_id, true, &statusr);
+	if (ret) {
+		kvm_err("Failed to save GICv5 IRS VM state: %d\n", ret);
+		goto out_unlock;
+	}
+
+	ret = vgic_v5_irs_status_is_quiesced(statusr);
+	if (ret)
+		goto out_unlock;
+
+	/* Save the SPI IST to the userspace buffer. */
+	ret = vgic_v5_save_spi_ist(kvm, attr);
+	if (ret)
+		goto out_unlock;
+
+	ret = vgic_v5_irs_is_quiesced(vm_id);
+	if (ret)
+		goto out_unlock;
+
+	/* Save the LPI IST to guest memory. */
+	ret = vgic_v5_save_lpi_ist(kvm);
+	if (ret)
+		goto out_unlock;
+
+	ret = vgic_v5_irs_is_quiesced(vm_id);
+	if (ret)
+		goto out_unlock;
+
+out_unlock:
+	mutex_unlock(&kvm->arch.config_lock);
+	kvm_unlock_all_vcpus(kvm);
+	mutex_unlock(&kvm->lock);
+
+	return ret;
+}
+
+static int vgic_v5_restore_lpi_ist_alloc(struct kvm *kvm, bool *allocated)
+{
+	unsigned int id_bits;
+	int ret;
+
+	*allocated = false;
+
+	ret = vgic_v5_irs_lpi_ist_id_bits(kvm, &id_bits);
+	if (ret <= 0)
+		return ret;
+
+	ret = vgic_v5_lpi_ist_alloc(kvm, id_bits);
+	if (ret)
+		return ret;
+
+	*allocated = true;
+
+	return 0;
+}
+
+/*
+ * Clean up the LPI IST if we allocated it, and restore the VMTE to the
+ * original, valid state.
+ */
+static void vgic_v5_restore_cleanup(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    bool lpi_ist_allocated)
+{
+	if (lpi_ist_allocated) {
+		WARN_ON(vgic_v5_send_command(vcpu, VMTE_MAKE_INVALID));
+		WARN_ON(vgic_v5_lpi_ist_free(kvm));
+	}
+
+	WARN_ON(vgic_v5_send_command(vcpu, VMTE_MAKE_VALID));
+}
+
+int vgic_v5_irs_restore_ists(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	bool lpi_ist_allocated = false, vmte_invalid = false;
+	struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
+	int ret = 0;
+
+	mutex_lock(&kvm->lock);
+
+	if (kvm_trylock_all_vcpus(kvm)) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	mutex_lock(&kvm->arch.config_lock);
+
+	if (!vgic_initialized(kvm)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	if (kvm_vm_has_ran_once(kvm)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	ret = vgic_v5_lpi_ist_exists(kvm);
+	if (ret) {
+		if (ret > 0)
+			ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/*
+	 * If the guest has previously allocated an IST (which we check based on
+	 * the IRS_IST_BASER), extract the number of LPI ID bits from the
+	 * IRS_IST_CFGR. Else, do nothing.
+	 *
+	 * We do this before making the VMTE invalid as we rely on
+	 * IRS_VMAP_VISTR to mark the IST as valid in the VMTE. This can only
+	 * happen while the VMTE is valid.
+	 */
+	ret = vgic_v5_restore_lpi_ist_alloc(kvm, &lpi_ist_allocated);
+	if (ret)
+		goto out_unlock;
+
+	/*
+	 * Host ISTs are updated while the VMTE is invalid, so the GIC cannot
+	 * observe partially restored state.
+	 */
+	ret = vgic_v5_send_command(vcpu0, VMTE_MAKE_INVALID);
+	if (ret) {
+		/*
+		 * If invalidation fails, the restore cannot safely update host
+		 * IST state.
+		 */
+		goto out_unlock;
+	}
+	vmte_invalid = true;
+
+	/* Restore the SPI IST from the userspace buffer. */
+	ret = vgic_v5_restore_spi_ist(kvm, attr);
+	if (ret)
+		goto out_unlock;
+
+	/* Restore the LPI IST from guest memory. */
+	if (lpi_ist_allocated) {
+		ret = vgic_v5_restore_lpi_ist(kvm);
+		if (ret)
+			goto out_unlock;
+	}
+
+	/* And make the VM Valid again */
+	ret = vgic_v5_send_command(vcpu0, VMTE_MAKE_VALID);
+	if (ret)
+		goto out_unlock;
+	vmte_invalid = false;
+
+	/*
+	 * As part of restoring the ISTs, and previously pending interrupts have
+	 * been tracked and made non-pending. Now that the ISTs have been
+	 * restored, and the VM is valid again, restore the pending interrupts.
+	 */
+	ret = vgic_v5_restore_pending_irqs(kvm);
+	if (ret)
+		goto out_unlock;
+
+	vgic_v5_restore_spi_ap_list(kvm);
+
+out_unlock:
+	if (ret && (vmte_invalid || lpi_ist_allocated)) {
+		vgic_v5_discard_pending_irqs(kvm);
+		vgic_v5_restore_cleanup(kvm, vcpu0, lpi_ist_allocated);
+	}
+
+	mutex_unlock(&kvm->arch.config_lock);
+	kvm_unlock_all_vcpus(kvm);
+	mutex_unlock(&kvm->lock);
+
+	return ret;
+}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index e05b4a5c2e49b..9c140a54e840e 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -384,11 +384,14 @@ void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
 void vgic_v5_save_state(struct kvm_vcpu *vcpu);
 int vgic_v5_register_irs_iodev(struct kvm *kvm, gpa_t irs_base_address);
+int vgic_v5_irs_lpi_ist_id_bits(struct kvm *kvm, unsigned int *id_bits);
 
 int vgic_v5_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
 				struct kvm_device_attr *attr, bool is_write);
 int vgic_v5_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
 const struct sys_reg_desc *vgic_v5_get_sysreg_table(unsigned int *sz);
+int vgic_v5_irs_save_ists(struct kvm *kvm, struct kvm_device_attr *attr);
+int vgic_v5_irs_restore_ists(struct kvm *kvm, struct kvm_device_attr *attr);
 int vgic_v5_irs_attr_regs_access(struct kvm_device *dev,
 				 struct kvm_device_attr *attr,
 				 u64 *reg, bool is_write);
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 710a0d267347d..1b9bbeab18a4e 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -423,6 +423,7 @@ enum {
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
 #define KVM_DEV_ARM_VGIC_GRP_IRS_REGS	10
+#define KVM_DEV_ARM_VGIC_GRP_IST	11
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
-- 
2.34.1


^ permalink raw reply related

* [PATCH v2 33/39] KVM: arm64: gic-v5: Add GICv5 EL1 sysreg userspace accessors
From: Sascha Bischoff @ 2026-05-21 15:00 UTC (permalink / raw)
  To: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org
  Cc: nd, maz@kernel.org, oliver.upton@linux.dev, Joey Gouly,
	Suzuki Poulose, yuzenghui@huawei.com, peter.maydell@linaro.org,
	lpieralisi@kernel.org, Timothy Hayes
In-Reply-To: <20260521144846.1899475-1-sascha.bischoff@arm.com>

Now that KVM is at the point where it is able to run meaningful VMs
with GICv5, it is important to be able to save/restore the GICv5 state
in order to allow for VM migration.

Add functions to handle the set/get for GICv5 EL1 system registers to
facilitate the save/restore of these. These access the stored
hypervisor state for the guest, rather than the guest registers
themselves. Much of the state that is read out is generated at this
point as it is stored across a range of registers. When writing the
system registers, the state is merged back into the appropriate
places.

The save/restore accessors follow the existing GICv3 CPU sysreg UAPI
encoding, so the GICv5 device can reuse that interface once the device
attribute plumbing is enabled.

Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
 arch/arm64/kvm/Makefile               |   3 +-
 arch/arm64/kvm/sys_regs.c             |   6 +-
 arch/arm64/kvm/vgic-sys-reg-v5.c      | 519 ++++++++++++++++++++++++++
 arch/arm64/kvm/vgic/vgic-kvm-device.c | 106 +++++-
 arch/arm64/kvm/vgic/vgic.h            |   7 +
 5 files changed, 633 insertions(+), 8 deletions(-)
 create mode 100644 arch/arm64/kvm/vgic-sys-reg-v5.c

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 92dda57c08766..7aaeeb84e788e 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -24,7 +24,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
 	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
 	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
 	 vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \
-	 vgic/vgic-v5.o vgic/vgic-v5-tables.o vgic/vgic-irs-v5.o
+	 vgic/vgic-v5.o vgic/vgic-v5-tables.o vgic/vgic-irs-v5.o \
+	 vgic-sys-reg-v5.o
 
 kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
 kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6083a1b23dbf9..af0d8357003be 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -5831,7 +5831,7 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
 
 int __init kvm_sys_reg_table_init(void)
 {
-	const struct sys_reg_desc *gicv3_regs;
+	const struct sys_reg_desc *gicv3_regs, *gicv5_regs;
 	bool valid = true;
 	unsigned int i, sz;
 	int ret = 0;
@@ -5844,8 +5844,12 @@ int __init kvm_sys_reg_table_init(void)
 	valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), false);
 	valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
 
+	/* The GICv3 system registers... */
 	gicv3_regs = vgic_v3_get_sysreg_table(&sz);
 	valid &= check_sysreg_table(gicv3_regs, sz, false);
+	/* ...and the GICv5 system registers. */
+	gicv5_regs = vgic_v5_get_sysreg_table(&sz);
+	valid &= check_sysreg_table(gicv5_regs, sz, false);
 
 	if (!valid)
 		return -EINVAL;
diff --git a/arch/arm64/kvm/vgic-sys-reg-v5.c b/arch/arm64/kvm/vgic-sys-reg-v5.c
new file mode 100644
index 0000000000000..bbdc4f222c029
--- /dev/null
+++ b/arch/arm64/kvm/vgic-sys-reg-v5.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, 2026 Arm Ltd.
+ */
+
+/*
+ * VGICv5 system registers handling functions for AArch64 mode
+ */
+
+#include <linux/irqchip/arm-gic-v5.h>
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/wordpart.h>
+
+#include <asm/kvm_emulate.h>
+
+#include "vgic/vgic.h"
+#include "sys_regs.h"
+
+#define ICC_PPI_PRIORITYR_PRIORITY_MASK		REPEAT_BYTE(0x1f)
+
+static int set_gic_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/* The upper 32 bits are RES0 */
+	cpu_if->vgic_apr = val & ~ICC_APR_EL1_RES0;
+
+	return 0;
+}
+
+static int get_gic_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 *val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	*val = cpu_if->vgic_apr;
+
+	return 0;
+}
+
+static int set_gic_cr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/*
+	 * We only support setting the ICC_CR0_EL1.En bit, which is actually
+	 * stored in the VMCR.
+	 */
+	FIELD_MODIFY(FEAT_GCIE_ICH_VMCR_EL2_EN, &cpu_if->vgic_vmcr,
+		     FIELD_GET(ICC_CR0_EL1_EN, val));
+
+	return 0;
+}
+
+static int get_gic_cr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 *val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/*
+	 * PID only applies if EL3 is present. Same applies to IPPT. Hence,
+	 * those fields are always presented as 0.
+	 *
+	 * We always present the link as connected and idle:
+	 *     (LINK = 1, LINK_IDLE = 1).
+	 */
+	*val = FIELD_PREP(ICC_CR0_EL1_EN,
+			  FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, cpu_if->vgic_vmcr));
+	*val |= ICC_CR0_EL1_LINK_MASK;
+	*val |= ICC_CR0_EL1_LINK_IDLE_MASK;
+
+	return 0;
+}
+
+static int set_gic_pcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	/* Set the VPMR field in the VMCR */
+	FIELD_MODIFY(FEAT_GCIE_ICH_VMCR_EL2_VPMR, &cpu_if->vgic_vmcr,
+		     FIELD_GET(ICC_PCR_EL1_PRIORITY, val));
+
+	return 0;
+}
+
+static int get_gic_pcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		       u64 *val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	*val = FIELD_PREP(ICC_PCR_EL1_PRIORITY,
+			  FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, cpu_if->vgic_vmcr));
+
+	return 0;
+}
+
+static int set_gic_icsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+			u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	cpu_if->vgic_icsr = val & ~ICC_ICSR_EL1_RES0;
+
+	return 0;
+}
+
+static int get_gic_icsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+			u64 *val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+	*val = cpu_if->vgic_icsr;
+
+	return 0;
+}
+
+/*
+ * Helper macro to iterate over a range of PPIs and execute some code (to either
+ * extract or set the vgic_irq state). This is used when `get`-ing the PPI
+ * ENABLER, ACTIVER, PENDR and when setting the PRIORITYR state.
+ *
+ * vcpu: Pointer to struct kvm_vcpu (to which these PPIs belong)
+ * r: The register index. 0 or 1 for all except PRIORITYR (which is 0-15)
+ * nr: The number of PPIs iterated over. 64 for all but PRIORITYR (which is 8)
+ * code: The code snippet to execute for each vgic_irq
+ */
+#define for_ppi_state(vcpu, r, nr, code)				\
+	do {								\
+		struct kvm_vcpu *__vcpu = (vcpu);			\
+		int __r = (r);						\
+		int __nr = (nr);					\
+									\
+		for (int i = 0; i < __nr; i++) {			\
+			u32 id = vgic_v5_make_ppi(__r * __nr + i);	\
+			struct vgic_irq *irq;				\
+									\
+			irq = vgic_get_vcpu_irq(__vcpu, id);		\
+			scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) { \
+				code;					\
+			}						\
+			vgic_put_irq(__vcpu->kvm, irq);			\
+		}							\
+	} while (0)
+
+static int set_gic_ppi_enabler(struct kvm_vcpu *vcpu,
+			       const struct sys_reg_desc *r, u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	int i, start, end, reg = r->Op2 % 2;
+
+	/*
+	 * If we're only handling architected PPIs and the guest writes to the
+	 * enable for the non-architected PPIs, we just return as there's
+	 * nothing to do at all. We don't even allocate the storage for them in
+	 * this case.
+	 */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1)
+		return 0;
+
+	/*
+	 * Merge the raw guest write into our bitmap at an offset of either 0 or
+	 * 64.
+	 *
+	 * Note that there is *NO* masking applied - the enable state is written
+	 * unfiltered. The assumption is that userspace uses this interface to
+	 * set initial state before the guest runs, and then the exposed PPI
+	 * mask is applied later, when vgic_v5_finalize_ppi_state() runs on
+	 * first entry to each vCPU. If userspace chooses to set the enabler
+	 * state later, it is fully capable of breaking the illusion we provided
+	 * to the guest by exposing register state (and PPIs) to the guest that
+	 * were not initially exposed. Good luck!
+	 */
+	bitmap_write(cpu_if->vgic_ppi_enabler, val, 64 * reg, 64);
+
+	/*
+	 * Sync the change in enable states to the vgic_irqs for the written
+	 * register slice.
+	 */
+	start = VGIC_V5_NR_PRIVATE_IRQS * reg;
+	end = start + VGIC_V5_NR_PRIVATE_IRQS;
+	for (i = start; i < end; i++) {
+		u32 intid = vgic_v5_make_ppi(i);
+		struct vgic_irq *irq;
+
+		irq = vgic_get_vcpu_irq(vcpu, intid);
+
+		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+			irq->enabled = test_bit(i, cpu_if->vgic_ppi_enabler);
+
+		vgic_put_irq(vcpu->kvm, irq);
+	}
+
+	return 0;
+}
+
+static int get_gic_ppi_enabler(struct kvm_vcpu *vcpu,
+			       const struct sys_reg_desc *r, u64 *val)
+{
+	unsigned long enabler = 0;
+	int reg = r->Op2 % 2;
+
+	/* If we only support architected PPIs, return 0 */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1) {
+		*val = 0;
+		return 0;
+	}
+
+	/* Iterate over each struct vgic_irq to build the ENABLER value. */
+	for_ppi_state(vcpu, reg, 64, __assign_bit(i % 64, &enabler, irq->enabled));
+
+	*val = enabler;
+
+	return 0;
+}
+
+static int set_gic_ppi_activer(struct kvm_vcpu *vcpu,
+			       const struct sys_reg_desc *r, u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	int i, start, end, reg = r->Op2 % 2;
+
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1)
+		return 0;
+
+	/*
+	 * Store the raw guest write. The exposed PPI mask is applied later,
+	 * when vgic_v5_finalize_ppi_state() runs on first entry to each
+	 * vCPU. See comment on set_gic_ppi_enabler() for details.
+	 */
+	bitmap_write(cpu_if->vgic_ppi_activer, val, 64 * reg, 64);
+
+	start = VGIC_V5_NR_PRIVATE_IRQS * reg;
+	end = start + VGIC_V5_NR_PRIVATE_IRQS;
+	for (i = start; i < end; i++) {
+		u32 intid = vgic_v5_make_ppi(i);
+		struct vgic_irq *irq;
+
+		irq = vgic_get_vcpu_irq(vcpu, intid);
+
+		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+			irq->active = test_bit(i, cpu_if->vgic_ppi_activer);
+
+		vgic_put_irq(vcpu->kvm, irq);
+	}
+
+	return 0;
+}
+
+static int get_gic_ppi_activer(struct kvm_vcpu *vcpu,
+			       const struct sys_reg_desc *r, u64 *val)
+{
+	unsigned long activer = 0;
+	int reg = r->Op2 % 2;
+
+	/* If we only support architected PPIs, return 0 */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1) {
+		*val = 0;
+		return 0;
+	}
+
+	/* Iterate over each struct vgic_irq to build the ACTIVER value. */
+	for_ppi_state(vcpu, reg, 64, __assign_bit(i % 64, &activer, irq->active));
+
+	*val = activer;
+
+	return 0;
+}
+
+static int set_gic_ppi_pendr(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_desc *r, u64 val)
+{
+	int i, start, end, reg = r->Op2 % 2;
+
+	/* If we only support architected PPIs, return */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1)
+		return 0;
+
+	/*
+	 * Update each struct vgic_irq with the pending state, treating Level
+	 * and Edge interrupts differently. The exposed PPI mask is applied
+	 * later, when vgic_v5_finalize_ppi_state() runs on first entry to each
+	 * vCPU. See comment on set_gic_ppi_enabler() for details.
+	 */
+	start = VGIC_V5_NR_PRIVATE_IRQS * reg;
+	end = start + VGIC_V5_NR_PRIVATE_IRQS;
+	for (i = start; i < end; i++) {
+		u32 intid = vgic_v5_make_ppi(i);
+		struct vgic_irq *irq;
+
+		irq = vgic_get_vcpu_irq(vcpu, intid);
+
+		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+			bool level = !!(val & BIT_ULL(i));
+
+			if (irq->config == VGIC_CONFIG_LEVEL)
+				irq->line_level = level;
+			else
+				irq->pending_latch = level;
+		}
+
+		vgic_put_irq(vcpu->kvm, irq);
+	}
+
+	/*
+	 * The pending state is generated from the vgic_irqs on each guest
+	 * entry. Therefore, we don't store the raw value written anywhere in
+	 * the case of userspace PPI_PENDRx_EL1 writes.
+	 */
+
+	return 0;
+}
+
+static int get_gic_ppi_pendr(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_desc *r, u64 *val)
+{
+	unsigned long pendr = 0;
+	int reg = r->Op2 % 2;
+
+	/* If we only support architected PPIs, return 0 */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg == 1) {
+		*val = 0;
+		return 0;
+	}
+
+	/* Iterate over each struct vgic_irq to build the PENDR value. */
+	for_ppi_state(vcpu, reg, 64, {
+		if (irq_is_pending(irq))
+			__assign_bit(i % 64, &pendr, 1);
+	});
+
+	*val = pendr;
+
+	return 0;
+}
+
+static int set_gic_ppi_priorityr(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r, u64 val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	int reg = ((r->CRm & 0x1) << 3) + r->Op2;
+
+	/* If we only support architected PPIs, return */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg > 7)
+		return 0;
+
+	val &= ICC_PPI_PRIORITYR_PRIORITY_MASK;
+
+	/*
+	 * Although priorities are not regularly synced back to the vgic_irq
+	 * state, they are explicitly synced back here. This is to ensure that
+	 * any pending PPIs are evaluated correctly when first running the guest
+	 * after setting the state.
+	 */
+	for_ppi_state(vcpu, reg, 8,
+		      irq->priority = (u8)(val >> (8 * i));
+		);
+
+	/*
+	 * Update the state that will be written to the ICH_PPI_PRIORITYRx_EL2
+	 * on next guest entry.
+	 */
+	cpu_if->vgic_ppi_priorityr[reg] = val;
+
+	return 0;
+}
+
+static int get_gic_ppi_priorityr(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r, u64 *val)
+{
+	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+	int reg = ((r->CRm & 0x1) << 3) + r->Op2;
+
+	/* If we only support architected PPIs, return 0 */
+	if (VGIC_V5_NR_PRIVATE_IRQS == 64 && reg > 7) {
+		*val = 0;
+		return 0;
+	}
+
+	/*
+	 * The priorities are only synced back to the vgic_irq state when the
+	 * vcpu is entering WFI (KVM only needs to know the priorities when
+	 * evaluating if there are pending PPI interrupts for a vcpu). The raw
+	 * register ICH_PPI_PRIORITYRx_EL1 state is simply saved and restored
+	 * blindly. This state is just returned as it contains the most recent
+	 * priorities written by the guest.
+	 */
+	*val = cpu_if->vgic_ppi_priorityr[reg];
+
+	return 0;
+}
+
+/*
+ * The following registers are NOT supported:
+ *
+ * - ICC_HAPR_EL1
+ *	The value of this is directly generated by the GICv5 hardware based on
+ *	the ICC_APR_EL1 when the guest is running.
+ * - ICC_IAFFIDR_EL1
+ *	The IAFFID for a GICv5 VPE is the same as the VPE ID, which is the index
+ *	into the in-memory VPE Table. This is not configurable, and instead we
+ *	rely on userspace recreating the VPEs in the same order prior to
+ *	restoring guest state.
+ * - ICC_PPI_CACTIVER<n>_EL1
+ *	Only raw state writes are supported via the S(et) variant.
+ * - ICC_PPI_CPENDR<n>_EL1
+ *	Only raw state writes are supported via the S(et) variant.
+ */
+static const struct sys_reg_desc gic_v5_icc_reg_descs[] = {
+	{ SYS_DESC(SYS_ICC_ICSR_EL1),
+	  .set_user = set_gic_icsr, .get_user = get_gic_icsr, },
+	{ SYS_DESC(SYS_ICC_PPI_ENABLER0_EL1),
+	  .set_user = set_gic_ppi_enabler, .get_user = get_gic_ppi_enabler, },
+	{ SYS_DESC(SYS_ICC_PPI_ENABLER1_EL1),
+	  .set_user = set_gic_ppi_enabler, .get_user = get_gic_ppi_enabler, },
+	/*
+	 * Only ICC_SACTIVER<n>_EL1 is exposed to the guest. This is treated as
+	 * a *RAW* write of register state for writes.
+	 */
+	{ SYS_DESC(SYS_ICC_PPI_SACTIVER0_EL1),
+	  .set_user = set_gic_ppi_activer, .get_user = get_gic_ppi_activer, },
+	{ SYS_DESC(SYS_ICC_PPI_SACTIVER1_EL1),
+	  .set_user = set_gic_ppi_activer, .get_user = get_gic_ppi_activer, },
+	/*
+	 * Only ICC_SPENDR<n>_EL1 is exposed to the guest. This is treated as
+	 * a *RAW* write of register state for writes.
+	 */
+	{ SYS_DESC(SYS_ICC_PPI_SPENDR0_EL1),
+	  .set_user = set_gic_ppi_pendr, .get_user = get_gic_ppi_pendr, },
+	{ SYS_DESC(SYS_ICC_PPI_SPENDR1_EL1),
+	  .set_user = set_gic_ppi_pendr, .get_user = get_gic_ppi_pendr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR0_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR1_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR2_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR3_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR4_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR5_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR6_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR7_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR8_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR9_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR10_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR11_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR12_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR13_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR14_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_PPI_PRIORITYR15_EL1),
+	  .set_user = set_gic_ppi_priorityr, .get_user = get_gic_ppi_priorityr, },
+	{ SYS_DESC(SYS_ICC_APR_EL1),
+	  .set_user = set_gic_apr, .get_user = get_gic_apr, },
+	{ SYS_DESC(SYS_ICC_CR0_EL1),
+	  .set_user = set_gic_cr0, .get_user = get_gic_cr0, },
+	{ SYS_DESC(SYS_ICC_PCR_EL1),
+	  .set_user = set_gic_pcr, .get_user = get_gic_pcr, },
+};
+
+const struct sys_reg_desc *vgic_v5_get_sysreg_table(unsigned int *sz)
+{
+	*sz = ARRAY_SIZE(gic_v5_icc_reg_descs);
+	return gic_v5_icc_reg_descs;
+}
+
+static u64 attr_to_id(u64 attr)
+{
+	return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr),
+			     FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP1_MASK, attr),
+			     FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRN_MASK, attr),
+			     FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRM_MASK, attr),
+			     FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP2_MASK, attr));
+}
+
+int vgic_v5_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+	const struct sys_reg_desc *r;
+
+	r = get_reg_by_id(attr_to_id(attr->attr), gic_v5_icc_reg_descs,
+			  ARRAY_SIZE(gic_v5_icc_reg_descs));
+
+	if (r && !sysreg_hidden(vcpu, r))
+		return 0;
+
+	return -ENXIO;
+}
+
+int vgic_v5_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
+				struct kvm_device_attr *attr,
+				bool is_write)
+{
+	struct kvm_one_reg reg = {
+		.id	= attr_to_id(attr->attr),
+		.addr	= attr->addr,
+	};
+
+	if (is_write)
+		return kvm_sys_reg_set_user(vcpu, &reg, gic_v5_icc_reg_descs,
+					    ARRAY_SIZE(gic_v5_icc_reg_descs));
+	else
+		return kvm_sys_reg_get_user(vcpu, &reg, gic_v5_icc_reg_descs,
+					    ARRAY_SIZE(gic_v5_icc_reg_descs));
+}
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 2bf1930902b8e..075e4c1326754 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -542,7 +542,7 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
  * Allow access to certain ID-like registers prior to VGIC initialization,
  * thereby allowing the VMM to provision the features / sizing of the VGIC.
  */
-static bool reg_allowed_pre_init(struct kvm_device_attr *attr)
+static bool v3_reg_allowed_pre_init(struct kvm_device_attr *attr)
 {
 	if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS)
 		return false;
@@ -605,7 +605,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->arch.config_lock);
 
-	if (!(vgic_initialized(dev->kvm) || reg_allowed_pre_init(attr))) {
+	if (!(vgic_initialized(dev->kvm) || v3_reg_allowed_pre_init(attr))) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -773,6 +773,92 @@ static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
 	return ret;
 }
 
+int vgic_v5_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+		       struct vgic_reg_attr *reg_attr)
+{
+	unsigned long vgic_mpidr, mpidr_reg;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+		vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >>
+			KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT;
+
+		mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr);
+		reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!reg_attr->vcpu)
+		return -EINVAL;
+
+	reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+
+	return 0;
+}
+
+/*
+ * Some registers can potentially be read before the core GIC & IRS has been
+ * initialised. Right now, everything is required to be post-init.
+ */
+static bool v5_reg_allowed_pre_init(struct kvm_device_attr *attr)
+{
+	return false;
+}
+
+/*
+ * vgic_v5_attr_regs_access - allows user space to access VGIC v5 state
+ *
+ * @dev:      kvm device handle
+ * @attr:     kvm device attribute
+ * @is_write: true if userspace is writing a register
+ */
+static int vgic_v5_attr_regs_access(struct kvm_device *dev,
+				    struct kvm_device_attr *attr,
+				    bool is_write)
+{
+	struct vgic_reg_attr reg_attr;
+	struct kvm_vcpu *vcpu;
+	int ret;
+
+	ret = vgic_v5_parse_attr(dev, attr, &reg_attr);
+	if (ret)
+		return ret;
+
+	vcpu = reg_attr.vcpu;
+
+	mutex_lock(&dev->kvm->lock);
+
+	if (kvm_trylock_all_vcpus(dev->kvm)) {
+		mutex_unlock(&dev->kvm->lock);
+		return -EBUSY;
+	}
+
+	mutex_lock(&dev->kvm->arch.config_lock);
+
+	if (!(vgic_initialized(dev->kvm) || v5_reg_allowed_pre_init(attr))) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+		ret = vgic_v5_cpu_sysregs_uaccess(vcpu, attr, is_write);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+out:
+	mutex_unlock(&dev->kvm->arch.config_lock);
+	kvm_unlock_all_vcpus(dev->kvm);
+	mutex_unlock(&dev->kvm->lock);
+
+	return ret;
+}
+
 static int vgic_v5_set_attr(struct kvm_device *dev,
 			    struct kvm_device_attr *attr)
 {
@@ -780,7 +866,7 @@ static int vgic_v5_set_attr(struct kvm_device *dev,
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
-		return -ENXIO;
+		return vgic_v5_attr_regs_access(dev, attr, true);
 	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_CTRL:
@@ -806,7 +892,7 @@ static int vgic_v5_get_attr(struct kvm_device *dev,
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
-		return -ENXIO;
+		return vgic_v5_attr_regs_access(dev, attr, false);
 	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_CTRL:
@@ -836,8 +922,16 @@ static int vgic_v5_has_attr(struct kvm_device *dev,
 			return 0;
 		}
 		return -ENXIO;
-	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
-		return -ENXIO;
+	case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+		struct vgic_reg_attr reg_attr;
+		int ret;
+
+		ret = vgic_v5_parse_attr(dev, attr, &reg_attr);
+		if (ret)
+			return ret;
+
+		return vgic_v5_has_cpu_sysregs_attr(reg_attr.vcpu, attr);
+	}
 	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
 		return 0;
 	case KVM_DEV_ARM_VGIC_GRP_CTRL:
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index b5036170430dd..bcdac044a23f4 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -252,6 +252,8 @@ struct ap_list_summary {
 #define irqs_active_outside_lrs(s)		\
 	((s)->nr_act &&	irqs_outside_lrs(s))
 
+int vgic_v5_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+		       struct vgic_reg_attr *reg_attr);
 int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
 		       struct vgic_reg_attr *reg_attr);
 int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
@@ -383,6 +385,11 @@ void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
 void vgic_v5_save_state(struct kvm_vcpu *vcpu);
 int vgic_v5_register_irs_iodev(struct kvm *kvm, gpa_t irs_base_address);
 
+int vgic_v5_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
+				struct kvm_device_attr *attr, bool is_write);
+int vgic_v5_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+const struct sys_reg_desc *vgic_v5_get_sysreg_table(unsigned int *sz);
+
 #define for_each_visible_v5_ppi(__i, __k)		\
 	for_each_set_bit(__i, (__k)->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS)
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH v1 15/15] arm64: dts: ti: k3-am62-verdin: Add Mezzanine with LG LP156WF1 LVDS panel
From: Vitor Soares @ 2026-05-21 15:00 UTC (permalink / raw)
  To: Laurent Pinchart, Neil Armstrong, Jessica Zhang,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
	Simona Vetter, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Nishanth Menon, Vignesh Raghavendra, Tero Kristo, Lad Prabhakar,
	Thierry Reding, Sam Ravnborg
  Cc: Vitor Soares, dri-devel, devicetree, linux-kernel,
	linux-arm-kernel
In-Reply-To: <20260521150038.103538-17-ivitro@gmail.com>

From: Vitor Soares <vitor.soares@toradex.com>

Add a device tree overlay enabling the LG LP156WF1 15.6" FHD (1920x1080)
dual-channel LVDS panel on the Verdin Development Board with Verdin AM62
Mezzanine expansion board. The panel connects via the AM62 OLDI0 and
OLDI1 in dual-channel mode on the Mezzanine LVDS interface (J10).

Assisted-by: Claude:claude-sonnet-4.6
Signed-off-by: Vitor Soares <vitor.soares@toradex.com>
---
 arch/arm64/boot/dts/ti/Makefile               |   5 +
 ...verdin-dev-mezzanine-lvds-lg-lp156wf1.dtso | 129 ++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 arch/arm64/boot/dts/ti/k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtso

diff --git a/arch/arm64/boot/dts/ti/Makefile b/arch/arm64/boot/dts/ti/Makefile
index 371f9a043fe5..e9951b5d2e0b 100644
--- a/arch/arm64/boot/dts/ti/Makefile
+++ b/arch/arm64/boot/dts/ti/Makefile
@@ -30,6 +30,7 @@ dtb-$(CONFIG_ARCH_K3) += k3-am625-phyboard-lyra-rdk.dtb
 dtb-$(CONFIG_ARCH_K3) += k3-am625-sk.dtb
 dtb-$(CONFIG_ARCH_K3) += k3-am625-tqma62xx-mba62xx.dtb
 dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-dev-mezzanine-can.dtbo
+dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtbo
 dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-dev-mezzanine-panel-cap-touch-10inch-lvds.dtbo
 dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-dev-nau8822-btl.dtbo
 dtb-$(CONFIG_ARCH_K3) += k3-am625-verdin-dsi-to-hdmi.dtbo
@@ -232,6 +233,9 @@ k3-am625-verdin-wifi-dev-dsi-to-lvds-panel-cap-touch-10inch-dtbs := \
 	k3-am625-verdin-dsi-to-lvds-panel-cap-touch-10inch.dtbo
 k3-am625-verdin-wifi-dev-mezzanine-can-dtbs := k3-am625-verdin-wifi-dev.dtb \
 	k3-am625-verdin-dev-mezzanine-can.dtbo
+k3-am625-verdin-wifi-dev-mezzanine-lvds-lg-lp156wf1-dtbs := \
+	k3-am625-verdin-wifi-dev.dtb \
+	k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtbo
 k3-am625-verdin-wifi-dev-mezzanine-panel-cap-touch-10inch-lvds-dtbs := \
 	k3-am625-verdin-wifi-dev.dtb \
 	k3-am625-verdin-dev-mezzanine-panel-cap-touch-10inch-lvds.dtbo
@@ -352,6 +356,7 @@ dtb- += k3-am625-beagleplay-csi2-ov5640.dtb \
 	k3-am625-sk-hdmi-audio.dtb \
 	k3-am625-verdin-wifi-dev-dsi-to-lvds-panel-cap-touch-10inch.dtb \
 	k3-am625-verdin-wifi-dev-mezzanine-can.dtb \
+	k3-am625-verdin-wifi-dev-mezzanine-lvds-lg-lp156wf1.dtb \
 	k3-am625-verdin-wifi-dev-mezzanine-panel-cap-touch-10inch-lvds.dtb \
 	k3-am625-verdin-wifi-dev-nau8822-btl.dtb \
 	k3-am625-verdin-wifi-dev-ov5640-24mhz.dtb \
diff --git a/arch/arm64/boot/dts/ti/k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtso b/arch/arm64/boot/dts/ti/k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtso
new file mode 100644
index 000000000000..a4d6cbe9ff3b
--- /dev/null
+++ b/arch/arm64/boot/dts/ti/k3-am625-verdin-dev-mezzanine-lvds-lg-lp156wf1.dtso
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+/*
+ * Copyright (c) Toradex
+ *
+ * LG LP156WF1 dual-channel LVDS panel on Verdin AM62 Mezzanine
+ * LVDS interface (J10), used with the Verdin Development Board.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pwm/pwm.h>
+
+&{/} {
+	backlight_pwm2: backlight-pwm2 {
+		compatible = "pwm-backlight";
+		brightness-levels = <0 45 63 88 119 158 203 255>;
+		default-brightness-level = <4>;
+		/* Verdin GPIO_4 (SODIMM 212) - LVDS_BKL_EN */
+		enable-gpios = <&mcu_gpio0 4 GPIO_ACTIVE_HIGH>;
+		/* Verdin PWM_2 (SODIMM 16) - LVDS_PWM */
+		pwms = <&epwm0 1 6666667 PWM_POLARITY_INVERTED>;
+	};
+
+	panel-lvds-native {
+		compatible = "lg,lp156wf1", "panel-lvds";
+		backlight = <&backlight_pwm2>;
+		data-mapping = "jeida-24";
+		height-mm = <194>;
+		width-mm = <345>;
+
+		panel-timing {
+			clock-frequency = <138500000>;
+			hactive = <1920>;
+			hback-porch = <40>;
+			hfront-porch = <24>;
+			hsync-len = <16>;
+			pixelclk-active = <0>;
+			vactive = <1080>;
+			vback-porch = <23>;
+			vfront-porch = <3>;
+			vsync-len = <5>;
+		};
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				dual-lvds-odd-pixels;
+
+				panel_lvds_native_in0: endpoint {
+					remote-endpoint = <&oldi0_out>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+				dual-lvds-even-pixels;
+
+				panel_lvds_native_in1: endpoint {
+					remote-endpoint = <&oldi1_out>;
+				};
+			};
+		};
+	};
+};
+
+&dss {
+	status = "okay";
+};
+
+&dss_ports {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	/* DSS VP1: internal DPI output to OLDIx */
+	port@0 {
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		dss0_out0: endpoint@0 {
+			reg = <0>;
+			remote-endpoint = <&oldi0_in>;
+		};
+
+		dss0_out1: endpoint@1 {
+			reg = <1>;
+			remote-endpoint = <&oldi1_in>;
+		};
+	};
+};
+
+&oldi0 {
+	ti,companion-oldi = <&oldi1>;
+	status = "okay";
+};
+
+&oldi0_port0 {
+	oldi0_in: endpoint {
+		remote-endpoint = <&dss0_out0>;
+	};
+};
+
+&oldi0_port1 {
+	oldi0_out: endpoint {
+		remote-endpoint = <&panel_lvds_native_in0>;
+	};
+};
+
+&oldi1 {
+	ti,secondary-oldi;
+	status = "okay";
+};
+
+&oldi1_port0 {
+	oldi1_in: endpoint {
+		remote-endpoint = <&dss0_out1>;
+	};
+};
+
+&oldi1_port1 {
+	oldi1_out: endpoint {
+		remote-endpoint = <&panel_lvds_native_in1>;
+	};
+};
-- 
2.54.0



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox