qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Sairaj Kodilkar <sarunkod@amd.com>
To: <qemu-devel@nongnu.org>, <kvm@vger.kernel.org>,
	<alejandro.j.jimenez@oracle.com>, <vasant.hegde@amd.com>,
	<suravee.suthikulpanit@amd.com>
Cc: <mst@redhat.com>, <imammedo@redhat.com>, <anisinha@redhat.com>,
	<marcel.apfelbaum@gmail.com>, <pbonzini@redhat.com>,
	<richard.henderson@linaro.org>, <eduardo@habkost.net>,
	<yi.l.liu@intel.com>, <eric.auger@redhat.com>,
	<zhenzhong.duan@intel.com>, <cohuck@redhat.com>,
	<seanjc@google.com>, <iommu@lists.linux.dev>,
	<kevin.tian@intel.com>, <joro@8bytes.org>,
	Sairaj Kodilkar <sarunkod@amd.com>
Subject: [RFC PATCH RESEND 5/5] amd_iommu: Add support for upto 2048 interrupts per IRT
Date: Tue, 18 Nov 2025 15:45:32 +0530	[thread overview]
Message-ID: <20251118101532.4315-6-sarunkod@amd.com> (raw)
In-Reply-To: <20251118101532.4315-1-sarunkod@amd.com>

AMD IOMMU supports upto 2048 MSIs for a single device function
when NUM_INT_REMAP_SUP Extended-Feature-Register-2 bit is set to one.
Software can enable this feature by writing one to NUM_INT_REMAP_MODE
in the control register. MSI address destination mode (DM) bit decides
how many MSI data bits are used by IOMMU to index into IRT. When DM = 0,
IOMMU uses bits 8:0 (max 512) for the index, otherwise (DM = 1)
IOMMU uses bits 10:0 (max 2048) for IRT index.

This feature can be enabled with flag `numint2k=on`. In case of
passhthrough devices viommu uses control register provided by vendor
capabilites to determine if host IOMMU has enabled 2048 MSIs. If host
IOMMU has not enabled it then the guest feature is disabled.

example command line
'''
-object iommufd,id=fd0 \
-device amd_iommu,dma-remap=on,numint2k=on \
-device vfio-host,host=<DEVID>,iommufd=fd0 \
'''

NOTE: In case of legacy VFIO container the guest will always fall back
to 512 MSIs.

Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
 hw/i386/amd_iommu.c | 74 ++++++++++++++++++++++++++++++++++++++++-----
 hw/i386/amd_iommu.h | 12 ++++++++
 2 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 3221bf5a0303..4f62c4ee3671 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -116,7 +116,12 @@ uint64_t amdvi_extended_feature_register(AMDVIState *s)
 
 uint64_t amdvi_extended_feature_register2(AMDVIState *s)
 {
-    return AMDVI_DEFAULT_EXT_FEATURES2;
+    uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES2;
+    if (s->num_int_sup_2k) {
+        feature |= AMDVI_FEATURE_NUM_INT_REMAP_SUP;
+    }
+
+    return feature;
 }
 
 /* configure MMIO registers at startup/reset */
@@ -1538,6 +1543,9 @@ static void amdvi_handle_control_write(AMDVIState *s)
                         AMDVI_MMIO_CONTROL_CMDBUFLEN);
     s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
 
+    s->num_int_enabled = (control >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT) &
+                         AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK;
+
     /* update the flags depending on the control register */
     if (s->cmdbuf_enabled) {
         amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
@@ -2119,6 +2127,25 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
      * (page 5)
      */
     delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
+    /*
+     * The MSI address register bit[2] is used to get the destination
+     * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
+     * and when IOMMU supports upto 2048 interrupts.
+     */
+    dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
+
+    if (dest_mode &&
+        iommu->num_int_enabled == AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K) {
+
+        trace_amdvi_ir_delivery_mode("2K interrupt mode");
+        ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
+        if (ret < 0) {
+            goto remap_fail;
+        }
+        /* Translate IRQ to MSI messages */
+        x86_iommu_irq_to_msi_message(&irq, translated);
+        goto out;
+    }
 
     switch (delivery_mode) {
     case AMDVI_IOAPIC_INT_TYPE_FIXED:
@@ -2159,12 +2186,6 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
         goto remap_fail;
     }
 
-    /*
-     * The MSI address register bit[2] is used to get the destination
-     * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
-     * only.
-     */
-    dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
     if (dest_mode) {
         trace_amdvi_ir_err("invalid dest_mode");
         ret = -AMDVI_IR_ERR;
@@ -2322,6 +2343,30 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &iommu_as[devfn]->as;
 }
 
+static void amdvi_refresh_efrs_hwinfo(struct AMDVIState *s,
+                                      struct iommu_hw_info_amd *hwinfo)
+{
+    /* Check if host OS has enabled 2K interrupts */
+    bool hwinfo_ctrl_2k;
+
+    if (s->num_int_sup_2k && !hwinfo) {
+        warn_report("AMDVI: Disabling 2048 MSI for guest, "
+                    "use IOMMUFD for device passthrough to support it");
+        s->num_int_sup_2k = 0;
+    }
+
+    hwinfo_ctrl_2k = ((hwinfo->control_register
+                       >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT)
+                      & AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K);
+
+    if (s->num_int_sup_2k && !hwinfo_ctrl_2k) {
+        warn_report("AMDVI: Disabling 2048 MSIs for guest, "
+                    "as host kernel does not support this feature");
+        s->num_int_sup_2k = 0;
+    }
+
+    amdvi_refresh_efrs(s);
+}
 
 static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
                                    HostIOMMUDevice *hiod, Error **errp)
@@ -2354,6 +2399,20 @@ static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
     object_ref(hiod);
     g_hash_table_insert(s->hiod_hash, new_key, hiod);
 
+    if (hiod->caps.type == IOMMU_HW_INFO_TYPE_AMD) {
+        /*
+         * Refresh the MMIO efr registers so that changes are visible to the
+         * guest.
+         */
+        amdvi_refresh_efrs_hwinfo(s, &hiod->caps.vendor_caps.amd);
+    } else {
+        /*
+         * Pass NULL hardware registers when we have non-IOMMUFD
+         * passthrough device
+         */
+        amdvi_refresh_efrs_hwinfo(s, NULL);
+    }
+
     return true;
 }
 
@@ -2641,6 +2700,7 @@ static const Property amdvi_properties[] = {
     DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
     DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
     DEFINE_PROP_BOOL("dma-remap", AMDVIState, dma_remap, false),
+    DEFINE_PROP_BOOL("numint2k", AMDVIState, num_int_sup_2k, false),
 };
 
 static const VMStateDescription vmstate_amdvi_sysbus = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index c8eaf229b50e..588725fe0c25 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -107,6 +107,9 @@
 #define AMDVI_MMIO_CONTROL_COMWAITINTEN   (1ULL << 4)
 #define AMDVI_MMIO_CONTROL_CMDBUFLEN      (1ULL << 12)
 #define AMDVI_MMIO_CONTROL_GAEN           (1ULL << 17)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK        (0x3)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT       (43)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K          (0x1)
 
 /* MMIO status register bits */
 #define AMDVI_MMIO_STATUS_CMDBUF_RUN  (1 << 4)
@@ -160,6 +163,7 @@
 #define AMDVI_PERM_READ             (1 << 0)
 #define AMDVI_PERM_WRITE            (1 << 1)
 
+/* EFR */
 #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
 #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
 #define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC Support      */
@@ -169,6 +173,9 @@
 #define AMDVI_FEATURE_HE                  (1ULL << 8) /* hardware error regs */
 #define AMDVI_FEATURE_PC                  (1ULL << 9) /* Perf counters       */
 
+/* EFR2 */
+#define AMDVI_FEATURE_NUM_INT_REMAP_SUP   (1ULL << 8) /* 2K int support      */
+
 /* reserved DTE bits */
 #define AMDVI_DTE_QUAD0_RESERVED        (GENMASK64(6, 2) | GENMASK64(63, 63))
 #define AMDVI_DTE_QUAD1_RESERVED        0
@@ -380,6 +387,8 @@ struct AMDVIState {
     bool evtlog_enabled;         /* event log enabled            */
     bool excl_enabled;
 
+    uint8_t num_int_enabled;
+
     hwaddr devtab;               /* base address device table    */
     uint64_t devtab_len;         /* device table length          */
 
@@ -433,6 +442,9 @@ struct AMDVIState {
 
     /* DMA address translation */
     bool dma_remap;
+
+    /* upto 2048 interrupt support */
+    bool num_int_sup_2k;
 };
 
 uint64_t amdvi_extended_feature_register(AMDVIState *s);
-- 
2.34.1



      parent reply	other threads:[~2025-11-18 10:18 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-18 10:15 [RFC PATCH RESEND 0/5] amd_iommu: support up to 2048 MSI vectors per IRT Sairaj Kodilkar
2025-11-18 10:15 ` [RFC PATCH RESEND 1/5] [DO NOT MERGE] linux-headers: Introduce struct iommu_hw_info_amd Sairaj Kodilkar
2025-11-18 10:15 ` [RFC PATCH RESEND 2/5] vfio/iommufd: Add amd specific hardware info struct to vendor capability Sairaj Kodilkar
2025-11-18 10:15 ` [RFC PATCH RESEND 3/5] amd-iommu: Add support for set/unset IOMMU for VFIO PCI devices Sairaj Kodilkar
2025-11-18 10:15 ` [RFC PATCH RESEND 4/5] amd_iommu: Add support for extended feature register 2 Sairaj Kodilkar
2025-11-18 10:15 ` Sairaj Kodilkar [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251118101532.4315-6-sarunkod@amd.com \
    --to=sarunkod@amd.com \
    --cc=alejandro.j.jimenez@oracle.com \
    --cc=anisinha@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=eric.auger@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=seanjc@google.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=vasant.hegde@amd.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).