From: Andrew Jones <ajones@ventanamicro.com>
To: iommu@lists.linux.dev, kvm-riscv@lists.infradead.org,
kvm@vger.kernel.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org
Cc: jgg@nvidia.com, zong.li@sifive.com, tjeznach@rivosinc.com,
joro@8bytes.org, will@kernel.org, robin.murphy@arm.com,
anup@brainfault.org, atish.patra@linux.dev, tglx@linutronix.de,
alex.williamson@redhat.com, paul.walmsley@sifive.com,
palmer@dabbelt.com, alex@ghiti.fr
Subject: [RFC PATCH v2 05/18] iommu/riscv: Prepare to use MSI table
Date: Sat, 20 Sep 2025 15:38:55 -0500 [thread overview]
Message-ID: <20250920203851.2205115-25-ajones@ventanamicro.com> (raw)
In-Reply-To: <20250920203851.2205115-20-ajones@ventanamicro.com>
Capture the IMSIC layout from its config and reserve all the addresses.
Then use the IMSIC layout info to calculate the maximum number of PTEs
the MSI table needs to support and allocate the MSI table when attaching
a paging domain for the first time. Finally, at the same time, map the
IMSIC addresses in the stage1 DMA table when the stage1 DMA table is not
BARE. This ensures it doesn't fault as it will translate the addresses
before the MSI table does.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
drivers/iommu/riscv/iommu-ir.c | 186 +++++++++++++++++++++++++++++++++
drivers/iommu/riscv/iommu.c | 6 ++
drivers/iommu/riscv/iommu.h | 4 +
3 files changed, 196 insertions(+)
diff --git a/drivers/iommu/riscv/iommu-ir.c b/drivers/iommu/riscv/iommu-ir.c
index 08cf159b587d..bed104c5333c 100644
--- a/drivers/iommu/riscv/iommu-ir.c
+++ b/drivers/iommu/riscv/iommu-ir.c
@@ -4,11 +4,108 @@
*
* Copyright © 2025 Ventana Micro Systems Inc.
*/
+#include <linux/irqchip/riscv-imsic.h>
#include <linux/irqdomain.h>
#include <linux/msi.h>
+#include <linux/sizes.h>
+#include "../iommu-pages.h"
#include "iommu.h"
+static size_t riscv_iommu_ir_group_size(struct riscv_iommu_domain *domain)
+{
+ phys_addr_t mask = domain->msi_addr_mask;
+
+ if (domain->group_index_bits) {
+ phys_addr_t group_mask = BIT(domain->group_index_bits) - 1;
+ phys_addr_t group_shift = domain->group_index_shift - 12;
+
+ mask &= ~(group_mask << group_shift);
+ }
+
+ return (mask + 1) << 12;
+}
+
+static int riscv_iommu_ir_map_unmap_imsics(struct riscv_iommu_domain *domain, bool map,
+ gfp_t gfp, size_t *unmapped)
+{
+ phys_addr_t base = domain->msi_addr_pattern << 12, addr;
+ size_t stride = domain->imsic_stride, map_size = SZ_4K, size;
+ size_t i, j;
+
+ size = riscv_iommu_ir_group_size(domain);
+
+ if (stride == SZ_4K)
+ stride = map_size = size;
+
+ for (i = 0; i < BIT(domain->group_index_bits); i++) {
+ for (j = 0; j < size; j += stride) {
+ addr = (base + j) | (i << domain->group_index_shift);
+ if (map) {
+ int ret = iommu_map(&domain->domain, addr, addr, map_size,
+ IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO, gfp);
+ if (ret)
+ return ret;
+ } else {
+ *unmapped += iommu_unmap(&domain->domain, addr, map_size);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static size_t riscv_iommu_ir_unmap_imsics(struct riscv_iommu_domain *domain)
+{
+ size_t unmapped = 0;
+
+ riscv_iommu_ir_map_unmap_imsics(domain, false, 0, &unmapped);
+
+ return unmapped;
+}
+
+static int riscv_iommu_ir_map_imsics(struct riscv_iommu_domain *domain, gfp_t gfp)
+{
+ int ret;
+
+ ret = riscv_iommu_ir_map_unmap_imsics(domain, true, gfp, NULL);
+ if (ret)
+ riscv_iommu_ir_unmap_imsics(domain);
+
+ return ret;
+}
+
+static size_t riscv_iommu_ir_compute_msipte_idx(struct riscv_iommu_domain *domain,
+ phys_addr_t msi_pa)
+{
+ phys_addr_t mask = domain->msi_addr_mask;
+ phys_addr_t addr = msi_pa >> 12;
+ size_t idx;
+
+ if (domain->group_index_bits) {
+ phys_addr_t group_mask = BIT(domain->group_index_bits) - 1;
+ phys_addr_t group_shift = domain->group_index_shift - 12;
+ phys_addr_t group = (addr >> group_shift) & group_mask;
+
+ mask &= ~(group_mask << group_shift);
+ idx = addr & mask;
+ idx |= group << fls64(mask);
+ } else {
+ idx = addr & mask;
+ }
+
+ return idx;
+}
+
+static size_t riscv_iommu_ir_nr_msiptes(struct riscv_iommu_domain *domain)
+{
+ phys_addr_t base = domain->msi_addr_pattern << 12;
+ phys_addr_t max_addr = base | (domain->msi_addr_mask << 12);
+ size_t max_idx = riscv_iommu_ir_compute_msipte_idx(domain, max_addr);
+
+ return max_idx + 1;
+}
+
static struct irq_chip riscv_iommu_ir_irq_chip = {
.name = "IOMMU-IR",
.irq_ack = irq_chip_ack_parent,
@@ -90,25 +187,114 @@ struct irq_domain *riscv_iommu_ir_irq_domain_create(struct riscv_iommu_device *i
return irqdomain;
}
+static void riscv_iommu_ir_free_msi_table(struct riscv_iommu_domain *domain)
+{
+ iommu_free_pages(domain->msi_root);
+}
+
void riscv_iommu_ir_irq_domain_remove(struct riscv_iommu_info *info)
{
+ struct riscv_iommu_domain *domain = info->domain;
struct fwnode_handle *fn;
if (!info->irqdomain)
return;
+ riscv_iommu_ir_free_msi_table(domain);
+
fn = info->irqdomain->fwnode;
irq_domain_remove(info->irqdomain);
info->irqdomain = NULL;
irq_domain_free_fwnode(fn);
}
+static int riscv_ir_set_imsic_global_config(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_domain *domain)
+{
+ const struct imsic_global_config *imsic_global;
+ u64 mask = 0;
+
+ imsic_global = imsic_get_global_config();
+
+ mask |= (BIT(imsic_global->group_index_bits) - 1) << (imsic_global->group_index_shift - 12);
+ mask |= BIT(imsic_global->hart_index_bits + imsic_global->guest_index_bits) - 1;
+ domain->msi_addr_mask = mask;
+ domain->msi_addr_pattern = imsic_global->base_addr >> 12;
+ domain->group_index_bits = imsic_global->group_index_bits;
+ domain->group_index_shift = imsic_global->group_index_shift;
+ domain->imsic_stride = BIT(imsic_global->guest_index_bits + 12);
+
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_MSI_FLAT) {
+ size_t nr_ptes = riscv_iommu_ir_nr_msiptes(domain);
+
+ domain->msi_root = iommu_alloc_pages_node_sz(domain->numa_node, GFP_KERNEL_ACCOUNT,
+ nr_ptes * sizeof(*domain->msi_root));
+ if (!domain->msi_root)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int riscv_iommu_ir_attach_paging_domain(struct riscv_iommu_domain *domain,
struct device *dev)
{
+ struct riscv_iommu_device *iommu = dev_to_iommu(dev);
+ struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
+ int ret;
+
+ if (!info->irqdomain)
+ return 0;
+
+ /*
+ * Do the domain's one-time setup of the msi configuration the
+ * first time the domain is attached and the msis are enabled.
+ */
+ if (domain->msi_addr_mask == 0) {
+ ret = riscv_ir_set_imsic_global_config(iommu, domain);
+ if (ret)
+ return ret;
+
+ /*
+ * The RISC-V IOMMU MSI table is checked after the stage1 DMA
+ * page tables. If we don't create identity mappings in the
+ * stage1 table then we'll fault and won't even get a chance
+ * to check the MSI table.
+ */
+ if (domain->pgd_mode) {
+ ret = riscv_iommu_ir_map_imsics(domain, GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ riscv_iommu_ir_free_msi_table(domain);
+ return ret;
+ }
+ }
+ }
+
return 0;
}
void riscv_iommu_ir_free_paging_domain(struct riscv_iommu_domain *domain)
{
+ riscv_iommu_ir_free_msi_table(domain);
+}
+
+void riscv_iommu_ir_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ const struct imsic_global_config *imsic_global;
+ struct iommu_resv_region *reg;
+ phys_addr_t addr;
+ size_t size, i;
+
+ imsic_global = imsic_get_global_config();
+ if (!imsic_global || !imsic_global->nr_ids)
+ return;
+
+ size = BIT(imsic_global->hart_index_bits + imsic_global->guest_index_bits + 12);
+
+ for (i = 0; i < BIT(imsic_global->group_index_bits); i++) {
+ addr = imsic_global->base_addr | (i << imsic_global->group_index_shift);
+ reg = iommu_alloc_resv_region(addr, size, 0, IOMMU_RESV_MSI, GFP_KERNEL);
+ if (reg)
+ list_add_tail(®->list, head);
+ }
}
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index db2acd9dc64b..0ba6504d4f33 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1423,6 +1423,11 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
return &domain->domain;
}
+static void riscv_iommu_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ riscv_iommu_ir_get_resv_regions(dev, head);
+}
+
static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain,
struct device *dev)
{
@@ -1561,6 +1566,7 @@ static const struct iommu_ops riscv_iommu_ops = {
.blocked_domain = &riscv_iommu_blocking_domain,
.release_domain = &riscv_iommu_blocking_domain,
.domain_alloc_paging = riscv_iommu_alloc_paging_domain,
+ .get_resv_regions = riscv_iommu_get_resv_regions,
.device_group = riscv_iommu_device_group,
.probe_device = riscv_iommu_probe_device,
.release_device = riscv_iommu_release_device,
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 640d825f11b9..dc2020b81bbc 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -30,6 +30,9 @@ struct riscv_iommu_domain {
struct riscv_iommu_msipte *msi_root;
u64 msi_addr_mask;
u64 msi_addr_pattern;
+ u32 group_index_bits;
+ u32 group_index_shift;
+ size_t imsic_stride;
};
/* Private IOMMU data for managed devices, dev_iommu_priv_* */
@@ -97,6 +100,7 @@ void riscv_iommu_ir_irq_domain_remove(struct riscv_iommu_info *info);
int riscv_iommu_ir_attach_paging_domain(struct riscv_iommu_domain *domain,
struct device *dev);
void riscv_iommu_ir_free_paging_domain(struct riscv_iommu_domain *domain);
+void riscv_iommu_ir_get_resv_regions(struct device *dev, struct list_head *head);
#define riscv_iommu_readl(iommu, addr) \
readl_relaxed((iommu)->reg + (addr))
--
2.49.0
next prev parent reply other threads:[~2025-09-20 20:39 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-20 20:38 [RFC PATCH v2 00/18] iommu/riscv: Add irqbypass support Andrew Jones
2025-09-20 20:38 ` [RFC PATCH v2 01/18] genirq/msi: Provide DOMAIN_BUS_MSI_REMAP Andrew Jones
2025-09-30 8:25 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 02/18] iommu/riscv: Move struct riscv_iommu_domain and info to iommu.h Andrew Jones
2025-09-30 8:26 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 03/18] iommu/riscv: Use data structure instead of individual values Andrew Jones
2025-09-24 3:25 ` Nutty.Liu
2025-09-24 13:31 ` Andrew Jones
2025-09-20 20:38 ` [RFC PATCH v2 04/18] iommu/riscv: Add IRQ domain for interrupt remapping Andrew Jones
2025-09-28 9:30 ` Nutty.Liu
2025-09-29 15:50 ` Andrew Jones
2025-09-20 20:38 ` Andrew Jones [this message]
2025-10-05 8:30 ` [RFC PATCH v2 05/18] iommu/riscv: Prepare to use MSI table Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 06/18] iommu/riscv: Implement MSI table management functions Andrew Jones
2025-10-05 8:28 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 07/18] iommu/riscv: Export phys_to_ppn and ppn_to_phys Andrew Jones
2025-10-05 8:39 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 08/18] iommu/riscv: Use MSI table to enable IMSIC access Andrew Jones
2025-09-22 18:43 ` Jason Gunthorpe
2025-09-22 21:20 ` Andrew Jones
2025-09-22 23:56 ` Jason Gunthorpe
2025-09-23 10:12 ` Thomas Gleixner
2025-09-23 14:06 ` Jason Gunthorpe
2025-09-23 15:12 ` Andrew Jones
2025-09-23 15:27 ` Jason Gunthorpe
2025-09-23 15:50 ` Andrew Jones
2025-09-23 16:23 ` Jason Gunthorpe
2025-09-23 16:33 ` Andrew Jones
2026-03-24 9:12 ` Vincent Chen
2026-03-26 17:31 ` Andrew Jones
2025-09-23 14:37 ` Andrew Jones
2025-09-23 14:52 ` Jason Gunthorpe
2025-09-23 15:37 ` Andrew Jones
2025-10-23 13:47 ` Jinvas
2025-09-20 20:38 ` [RFC PATCH v2 09/18] iommu/dma: enable IOMMU_DMA for RISC-V Andrew Jones
2025-10-05 8:40 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 10/18] RISC-V: Define irqbypass vcpu_info Andrew Jones
2025-10-05 8:41 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 11/18] iommu/riscv: Maintain each irq msitbl index with chip data Andrew Jones
2025-09-20 20:39 ` [RFC PATCH v2 12/18] iommu/riscv: Add guest file irqbypass support Andrew Jones
2025-09-20 20:39 ` [RFC PATCH v2 13/18] iommu/riscv: report iommu capabilities Andrew Jones
2025-10-05 8:43 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 14/18] RISC-V: KVM: Enable KVM_VFIO interfaces on RISC-V arch Andrew Jones
2025-10-05 8:44 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 15/18] RISC-V: KVM: Add guest file irqbypass support Andrew Jones
2025-09-20 20:39 ` [RFC PATCH v2 16/18] vfio: enable IOMMU_TYPE1 for RISC-V Andrew Jones
2025-10-05 8:44 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 17/18] RISC-V: defconfig: Add VFIO modules Andrew Jones
2025-10-05 8:47 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 18/18] DO NOT UPSTREAM: RISC-V: KVM: Workaround kvm_riscv_gstage_ioremap() bug Andrew Jones
2025-10-20 13:12 ` fangyu.yu
2025-10-20 19:47 ` Daniel Henrique Barboza
2025-10-21 1:10 ` fangyu.yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250920203851.2205115-25-ajones@ventanamicro.com \
--to=ajones@ventanamicro.com \
--cc=alex.williamson@redhat.com \
--cc=alex@ghiti.fr \
--cc=anup@brainfault.org \
--cc=atish.patra@linux.dev \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=joro@8bytes.org \
--cc=kvm-riscv@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=robin.murphy@arm.com \
--cc=tglx@linutronix.de \
--cc=tjeznach@rivosinc.com \
--cc=will@kernel.org \
--cc=zong.li@sifive.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox