From: Andrew Jones <ajones@ventanamicro.com>
To: iommu@lists.linux.dev, kvm-riscv@lists.infradead.org,
kvm@vger.kernel.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org
Cc: jgg@nvidia.com, zong.li@sifive.com, tjeznach@rivosinc.com,
joro@8bytes.org, will@kernel.org, robin.murphy@arm.com,
anup@brainfault.org, atish.patra@linux.dev, tglx@linutronix.de,
alex.williamson@redhat.com, paul.walmsley@sifive.com,
palmer@dabbelt.com, alex@ghiti.fr
Subject: [RFC PATCH v2 12/18] iommu/riscv: Add guest file irqbypass support
Date: Sat, 20 Sep 2025 15:39:02 -0500 [thread overview]
Message-ID: <20250920203851.2205115-32-ajones@ventanamicro.com> (raw)
In-Reply-To: <20250920203851.2205115-20-ajones@ventanamicro.com>
Implement irq_set_vcpu_affinity() in the RISCV IOMMU driver.
irq_set_vcpu_affinity() is the channel from a hypervisor to the
IOMMU needed to ensure that assigned devices which direct MSIs to
guest IMSIC addresses will have those MSI writes redirected to
their corresponding guest interrupt files.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
drivers/iommu/riscv/iommu-ir.c | 165 ++++++++++++++++++++++++++++++++-
drivers/iommu/riscv/iommu.c | 5 +-
drivers/iommu/riscv/iommu.h | 4 +
3 files changed, 171 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/riscv/iommu-ir.c b/drivers/iommu/riscv/iommu-ir.c
index 059671f18267..48f424ce1a8d 100644
--- a/drivers/iommu/riscv/iommu-ir.c
+++ b/drivers/iommu/riscv/iommu-ir.c
@@ -10,6 +10,8 @@
#include <linux/msi.h>
#include <linux/sizes.h>
+#include <asm/irq.h>
+
#include "../iommu-pages.h"
#include "iommu.h"
@@ -164,6 +166,48 @@ static void riscv_iommu_ir_msitbl_inval(struct riscv_iommu_domain *domain,
rcu_read_unlock();
}
+static void riscv_iommu_ir_msitbl_clear(struct riscv_iommu_domain *domain)
+{
+ for (size_t i = 0; i < riscv_iommu_ir_nr_msiptes(domain); i++) {
+ riscv_iommu_ir_clear_pte(&domain->msi_root[i]);
+ refcount_set(&domain->msi_pte_counts[i], 0);
+ }
+}
+
+static void riscv_iommu_ir_msiptp_update(struct riscv_iommu_domain *domain)
+{
+ struct riscv_iommu_bond *bond;
+ struct riscv_iommu_device *iommu, *prev;
+ struct riscv_iommu_dc new_dc = {
+ .ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
+ RISCV_IOMMU_PC_TA_V,
+ .fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
+ FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root)),
+ .msiptp = virt_to_pfn(domain->msi_root) |
+ FIELD_PREP(RISCV_IOMMU_DC_MSIPTP_MODE,
+ RISCV_IOMMU_DC_MSIPTP_MODE_FLAT),
+ .msi_addr_mask = domain->msi_addr_mask,
+ .msi_addr_pattern = domain->msi_addr_pattern,
+ };
+
+ /* Like riscv_iommu_ir_msitbl_inval(), synchronize with riscv_iommu_bond_link() */
+ smp_mb();
+
+ rcu_read_lock();
+
+ prev = NULL;
+ list_for_each_entry_rcu(bond, &domain->bonds, list) {
+ iommu = dev_to_iommu(bond->dev);
+ if (iommu == prev)
+ continue;
+
+ riscv_iommu_iodir_update(iommu, bond->dev, &new_dc);
+ prev = iommu;
+ }
+
+ rcu_read_unlock();
+}
+
struct riscv_iommu_ir_chip_data {
size_t idx;
u32 config;
@@ -279,12 +323,127 @@ static int riscv_iommu_ir_irq_set_affinity(struct irq_data *data,
return ret;
}
+static bool riscv_iommu_ir_vcpu_check_config(struct riscv_iommu_domain *domain,
+ struct riscv_iommu_ir_vcpu_info *vcpu_info)
+{
+ return domain->msi_addr_mask == vcpu_info->msi_addr_mask &&
+ domain->msi_addr_pattern == vcpu_info->msi_addr_pattern &&
+ domain->group_index_bits == vcpu_info->group_index_bits &&
+ domain->group_index_shift == vcpu_info->group_index_shift;
+}
+
+static int riscv_iommu_ir_vcpu_new_config(struct riscv_iommu_domain *domain,
+ struct irq_data *data,
+ struct riscv_iommu_ir_vcpu_info *vcpu_info)
+{
+ struct riscv_iommu_msipte *pte;
+ size_t idx;
+ int ret;
+
+ if (domain->pgd_mode)
+ riscv_iommu_ir_unmap_imsics(domain);
+
+ riscv_iommu_ir_msitbl_clear(domain);
+
+ domain->msi_addr_mask = vcpu_info->msi_addr_mask;
+ domain->msi_addr_pattern = vcpu_info->msi_addr_pattern;
+ domain->group_index_bits = vcpu_info->group_index_bits;
+ domain->group_index_shift = vcpu_info->group_index_shift;
+ domain->imsic_stride = SZ_4K;
+ domain->msitbl_config += 1;
+
+ if (domain->pgd_mode) {
+ /*
+ * As in riscv_iommu_ir_irq_domain_create(), we do all stage1
+ * mappings up front since the MSI table will manage the
+ * translations.
+ *
+ * XXX: Since irq-set-vcpu-affinity is called in atomic context
+ * we need GFP_ATOMIC. If the number of 4K dma pte allocations
+ * is considered too many for GFP_ATOMIC, then we can wrap
+ * riscv_iommu_pte_alloc()'s iommu_alloc_pages_node_sz() call
+ * in a mempool and try to ensure the pool has enough elements
+ * in riscv_iommu_ir_irq_domain_enable_msis().
+ */
+ ret = riscv_iommu_ir_map_imsics(domain, GFP_ATOMIC);
+ if (ret)
+ return ret;
+ }
+
+ idx = riscv_iommu_ir_compute_msipte_idx(domain, vcpu_info->gpa);
+ pte = &domain->msi_root[idx];
+ riscv_iommu_ir_irq_set_msitbl_info(data, idx, domain->msitbl_config);
+ riscv_iommu_ir_set_pte(pte, vcpu_info->hpa);
+ riscv_iommu_ir_msitbl_inval(domain, NULL);
+ refcount_set(&domain->msi_pte_counts[idx], 1);
+
+ riscv_iommu_ir_msiptp_update(domain);
+
+ return 0;
+}
+
+static int riscv_iommu_ir_irq_set_vcpu_affinity(struct irq_data *data, void *arg)
+{
+ struct riscv_iommu_info *info = data->domain->host_data;
+ struct riscv_iommu_domain *domain = info->domain;
+ struct riscv_iommu_ir_vcpu_info *vcpu_info = arg;
+ struct riscv_iommu_msipte pteval;
+ struct riscv_iommu_msipte *pte;
+ bool inc = false, dec = false;
+ size_t old_idx, new_idx;
+ u32 old_config;
+
+ if (!domain->msi_root)
+ return -EOPNOTSUPP;
+
+ old_idx = riscv_iommu_ir_irq_msitbl_idx(data);
+ old_config = riscv_iommu_ir_irq_msitbl_config(data);
+
+ if (!vcpu_info) {
+ riscv_iommu_ir_msitbl_unmap(domain, data, old_idx);
+ return 0;
+ }
+
+ guard(raw_spinlock)(&domain->msi_lock);
+
+ if (!riscv_iommu_ir_vcpu_check_config(domain, vcpu_info))
+ return riscv_iommu_ir_vcpu_new_config(domain, data, vcpu_info);
+
+ new_idx = riscv_iommu_ir_compute_msipte_idx(domain, vcpu_info->gpa);
+ riscv_iommu_ir_irq_set_msitbl_info(data, new_idx, domain->msitbl_config);
+
+ pte = &domain->msi_root[new_idx];
+ riscv_iommu_ir_set_pte(&pteval, vcpu_info->hpa);
+
+ if (pteval.pte != pte->pte) {
+ *pte = pteval;
+ riscv_iommu_ir_msitbl_inval(domain, pte);
+ }
+
+ if (old_config != domain->msitbl_config)
+ inc = true;
+ else if (new_idx != old_idx)
+ inc = dec = true;
+
+ if (dec && refcount_dec_and_test(&domain->msi_pte_counts[old_idx])) {
+ pte = &domain->msi_root[old_idx];
+ riscv_iommu_ir_clear_pte(pte);
+ riscv_iommu_ir_msitbl_inval(domain, pte);
+ }
+
+ if (inc && !refcount_inc_not_zero(&domain->msi_pte_counts[new_idx]))
+ refcount_set(&domain->msi_pte_counts[new_idx], 1);
+
+ return 0;
+}
+
static struct irq_chip riscv_iommu_ir_irq_chip = {
.name = "IOMMU-IR",
.irq_ack = irq_chip_ack_parent,
.irq_mask = irq_chip_mask_parent,
.irq_unmask = irq_chip_unmask_parent,
.irq_set_affinity = riscv_iommu_ir_irq_set_affinity,
+ .irq_set_vcpu_affinity = riscv_iommu_ir_irq_set_vcpu_affinity,
};
static int riscv_iommu_ir_irq_domain_alloc_irqs(struct irq_domain *irqdomain,
@@ -334,7 +493,11 @@ static void riscv_iommu_ir_irq_domain_free_irqs(struct irq_domain *irqdomain,
config = riscv_iommu_ir_irq_msitbl_config(data);
/*
* Only irqs with matching config versions need to be unmapped here
- * since config changes will unmap everything.
+ * since config changes will unmap everything and irq-set-vcpu-affinity
+ * irq deletions unmap at deletion time. An example of stale indices that
+ * don't need to be unmapped are those of irqs allocated by VFIO that a
+ * guest driver never used. The config change made for the guest will have
+ * already unmapped those, though, so there's no need to unmap them here.
*/
if (config == domain->msitbl_config) {
idx = riscv_iommu_ir_irq_msitbl_idx(data);
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 440c3eb6f15a..02f38aa0b231 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -957,8 +957,9 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
* device is not quiesced might be disruptive, potentially causing
* interim translation faults.
*/
-static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
- struct device *dev, struct riscv_iommu_dc *new_dc)
+void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
+ struct device *dev,
+ struct riscv_iommu_dc *new_dc)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct riscv_iommu_dc *dc;
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 130f82e8392a..5ab2b4d6ee88 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -124,6 +124,10 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu);
void riscv_iommu_remove(struct riscv_iommu_device *iommu);
void riscv_iommu_disable(struct riscv_iommu_device *iommu);
+void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
+ struct device *dev,
+ struct riscv_iommu_dc *new_dc);
+
void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu,
struct riscv_iommu_command *cmd);
void riscv_iommu_cmd_sync(struct riscv_iommu_device *iommu, unsigned int timeout_us);
--
2.49.0
next prev parent reply other threads:[~2025-09-20 20:39 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-20 20:38 [RFC PATCH v2 00/18] iommu/riscv: Add irqbypass support Andrew Jones
2025-09-20 20:38 ` [RFC PATCH v2 01/18] genirq/msi: Provide DOMAIN_BUS_MSI_REMAP Andrew Jones
2025-09-30 8:25 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 02/18] iommu/riscv: Move struct riscv_iommu_domain and info to iommu.h Andrew Jones
2025-09-30 8:26 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 03/18] iommu/riscv: Use data structure instead of individual values Andrew Jones
2025-09-24 3:25 ` Nutty.Liu
2025-09-24 13:31 ` Andrew Jones
2025-09-20 20:38 ` [RFC PATCH v2 04/18] iommu/riscv: Add IRQ domain for interrupt remapping Andrew Jones
2025-09-28 9:30 ` Nutty.Liu
2025-09-29 15:50 ` Andrew Jones
2025-09-20 20:38 ` [RFC PATCH v2 05/18] iommu/riscv: Prepare to use MSI table Andrew Jones
2025-10-05 8:30 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 06/18] iommu/riscv: Implement MSI table management functions Andrew Jones
2025-10-05 8:28 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 07/18] iommu/riscv: Export phys_to_ppn and ppn_to_phys Andrew Jones
2025-10-05 8:39 ` Nutty.Liu
2025-09-20 20:38 ` [RFC PATCH v2 08/18] iommu/riscv: Use MSI table to enable IMSIC access Andrew Jones
2025-09-22 18:43 ` Jason Gunthorpe
2025-09-22 21:20 ` Andrew Jones
2025-09-22 23:56 ` Jason Gunthorpe
2025-09-23 10:12 ` Thomas Gleixner
2025-09-23 14:06 ` Jason Gunthorpe
2025-09-23 15:12 ` Andrew Jones
2025-09-23 15:27 ` Jason Gunthorpe
2025-09-23 15:50 ` Andrew Jones
2025-09-23 16:23 ` Jason Gunthorpe
2025-09-23 16:33 ` Andrew Jones
2026-03-24 9:12 ` Vincent Chen
2026-03-26 17:31 ` Andrew Jones
2025-09-23 14:37 ` Andrew Jones
2025-09-23 14:52 ` Jason Gunthorpe
2025-09-23 15:37 ` Andrew Jones
2025-10-23 13:47 ` Jinvas
2025-09-20 20:38 ` [RFC PATCH v2 09/18] iommu/dma: enable IOMMU_DMA for RISC-V Andrew Jones
2025-10-05 8:40 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 10/18] RISC-V: Define irqbypass vcpu_info Andrew Jones
2025-10-05 8:41 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 11/18] iommu/riscv: Maintain each irq msitbl index with chip data Andrew Jones
2025-09-20 20:39 ` Andrew Jones [this message]
2025-09-20 20:39 ` [RFC PATCH v2 13/18] iommu/riscv: report iommu capabilities Andrew Jones
2025-10-05 8:43 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 14/18] RISC-V: KVM: Enable KVM_VFIO interfaces on RISC-V arch Andrew Jones
2025-10-05 8:44 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 15/18] RISC-V: KVM: Add guest file irqbypass support Andrew Jones
2025-09-20 20:39 ` [RFC PATCH v2 16/18] vfio: enable IOMMU_TYPE1 for RISC-V Andrew Jones
2025-10-05 8:44 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 17/18] RISC-V: defconfig: Add VFIO modules Andrew Jones
2025-10-05 8:47 ` Nutty.Liu
2025-09-20 20:39 ` [RFC PATCH v2 18/18] DO NOT UPSTREAM: RISC-V: KVM: Workaround kvm_riscv_gstage_ioremap() bug Andrew Jones
2025-10-20 13:12 ` fangyu.yu
2025-10-20 19:47 ` Daniel Henrique Barboza
2025-10-21 1:10 ` fangyu.yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250920203851.2205115-32-ajones@ventanamicro.com \
--to=ajones@ventanamicro.com \
--cc=alex.williamson@redhat.com \
--cc=alex@ghiti.fr \
--cc=anup@brainfault.org \
--cc=atish.patra@linux.dev \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=joro@8bytes.org \
--cc=kvm-riscv@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=robin.murphy@arm.com \
--cc=tglx@linutronix.de \
--cc=tjeznach@rivosinc.com \
--cc=will@kernel.org \
--cc=zong.li@sifive.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox