qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com,
	eric.auger@redhat.com, peterx@redhat.com, jasowang@redhat.com,
	mst@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
	joao.m.martins@oracle.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, yi.y.sun@intel.com, chao.p.peng@intel.com,
	Yi Sun <yi.y.sun@linux.intel.com>,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Eduardo Habkost <eduardo@habkost.net>
Subject: [PATCH rfcv1 19/23] intel_iommu: introduce pasid iotlb cache
Date: Mon, 15 Jan 2024 18:37:31 +0800	[thread overview]
Message-ID: <20240115103735.132209-20-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20240115103735.132209-1-zhenzhong.duan@intel.com>

From: Yi Liu <yi.l.liu@intel.com>

To accelerate stage-1 translation, introduce pasid iotlb cache.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |   1 +
 include/hw/i386/intel_iommu.h  |   1 +
 hw/i386/intel_iommu.c          | 126 +++++++++++++++++++++++++++++++--
 hw/i386/trace-events           |   1 +
 4 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 21fa767740..08701f5457 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -480,6 +480,7 @@ typedef union VTDInvDesc VTDInvDesc;
 
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
+    bool is_piotlb;
     uint16_t domain_id;
     uint32_t pasid;
     uint64_t addr;
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index dedaab5ac9..f3e75263b7 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -348,6 +348,7 @@ struct IntelIOMMUState {
 
     uint32_t context_cache_gen;     /* Should be in [1,MAX] */
     GHashTable *iotlb;              /* IOTLB */
+    GHashTable *p_iotlb;            /* pasid based IOTLB */
 
     GHashTable *vtd_address_spaces;             /* VTD address spaces */
     VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1e87383a41..e9480608a5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -82,6 +82,8 @@ static VTDPASIDAddressSpace *vtd_add_find_pasid_as(IntelIOMMUState *s,
                                                    PCIBus *bus,
                                                    int devfn,
                                                    uint32_t pasid);
+static int vtd_dev_get_rid2pasid(IntelIOMMUState *s, uint8_t bus_num,
+                                 uint8_t devfn, uint32_t *rid_pasid);
 
 static void vtd_panic_require_caching_mode(void)
 {
@@ -297,6 +299,7 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
     uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
     uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
     return (entry->domain_id == info->domain_id) &&
+            (info->is_piotlb ? (entry->pasid == info->pasid) : 1) &&
             (((entry->gfn & info->mask) == gfn) ||
              (entry->gfn == gfn_tlb));
 }
@@ -333,12 +336,19 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
     vtd_iommu_unlock(s);
 }
 
+static void vtd_reset_piotlb(IntelIOMMUState *s)
+{
+    assert(s->p_iotlb);
+    g_hash_table_remove_all(s->p_iotlb);
+}
+
 static void vtd_reset_caches(IntelIOMMUState *s)
 {
     vtd_iommu_lock(s);
     vtd_reset_iotlb_locked(s);
     vtd_reset_context_cache_locked(s);
     vtd_pasid_cache_reset(s);
+    vtd_reset_piotlb(s);
     vtd_iommu_unlock(s);
 }
 
@@ -2026,6 +2036,63 @@ static void vtd_report_fault(IntelIOMMUState *s,
     }
 }
 
+static uint64_t vtd_get_piotlb_gfn(hwaddr addr, uint32_t level)
+{
+    return (addr & vtd_flpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
+}
+
+static int vtd_get_piotlb_key(char *key, int key_size, uint64_t gfn,
+                              uint32_t pasid, uint32_t level,
+                              uint16_t source_id)
+{
+    return snprintf(key, key_size,
+                    "rsv%010dsid%06dpasid%010dgfn%017lldlevel%01d",
+                    0, source_id, pasid, (unsigned long long int)gfn, level);
+}
+
+static VTDIOTLBEntry *vtd_lookup_piotlb(IntelIOMMUState *s, uint32_t pasid,
+                                        hwaddr addr, uint16_t source_id)
+{
+    VTDIOTLBEntry *entry;
+    char key[64];
+    int level;
+
+    for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
+        vtd_get_piotlb_key(&key[0], 64, vtd_get_piotlb_gfn(addr, level),
+                           pasid, level, source_id);
+        entry = g_hash_table_lookup(s->p_iotlb, &key[0]);
+        if (entry) {
+            goto out;
+        }
+    }
+
+out:
+    return entry;
+}
+
+static void vtd_update_piotlb(IntelIOMMUState *s, uint32_t pasid,
+                              uint16_t domain_id, hwaddr addr, uint64_t flpte,
+                              uint8_t access_flags, uint32_t level,
+                              uint16_t source_id)
+{
+    VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
+    char *key = g_malloc(64);
+    uint64_t gfn = vtd_get_piotlb_gfn(addr, level);
+
+    if (g_hash_table_size(s->p_iotlb) >= VTD_PASID_IOTLB_MAX_SIZE) {
+        vtd_reset_piotlb(s);
+    }
+
+    entry->gfn = gfn;
+    entry->domain_id = domain_id;
+    entry->pte = flpte;
+    entry->pasid = pasid;
+    entry->access_flags = access_flags;
+    entry->mask = vtd_flpt_level_page_mask(level);
+    vtd_get_piotlb_key(key, 64, gfn, pasid, level, source_id);
+    g_hash_table_replace(s->p_iotlb, key, entry);
+}
+
 /*
  * Map dev to pasid-entry then do a paging-structures walk to do a iommu
  * translation.
@@ -2056,6 +2123,8 @@ static bool vtd_do_iommu_fl_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
     bool reads = true;
     bool writes = true;
     uint8_t access_flags;
+    uint32_t pasid;
+    VTDIOTLBEntry *piotlb_entry;
 
     /*
      * We have standalone memory region for interrupt addresses, we
@@ -2074,8 +2143,30 @@ static bool vtd_do_iommu_fl_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         return false;
     }
 
+    /* For emulated device IOVA translation, use RID2PASID. */
+    if (vtd_dev_get_rid2pasid(s, pci_bus_num(bus), devfn, &pasid)) {
+        error_report_once("%s: detected translation failure 2 "
+                          "(dev=%02x:%02x:%02x, iova=0x%" PRIx64 ")",
+                          __func__, pci_bus_num(bus),
+                          VTD_PCI_SLOT(devfn),
+                          VTD_PCI_FUNC(devfn),
+                          addr);
+        return false;
+    }
+
     vtd_iommu_lock(s);
 
+    /* Try to fetch flpte form IOTLB */
+    piotlb_entry = vtd_lookup_piotlb(s, pasid, addr, source_id);
+    if (piotlb_entry) {
+        trace_vtd_piotlb_page_hit(source_id, pasid, addr, piotlb_entry->pte,
+                                  piotlb_entry->domain_id);
+        flpte = piotlb_entry->pte;
+        access_flags = piotlb_entry->access_flags;
+        page_mask = piotlb_entry->mask;
+        goto out;
+    }
+
     ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe, PCI_NO_PASID);
     is_fpd_set = pe.val[0] & VTD_PASID_ENTRY_FPD;
     if (ret) {
@@ -2108,6 +2199,9 @@ static bool vtd_do_iommu_fl_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
     page_mask = vtd_flpt_level_page_mask(level);
     access_flags = IOMMU_ACCESS_FLAG(reads, writes);
 
+    vtd_update_piotlb(s, pasid, vtd_pe_get_domain_id(&pe), addr, flpte,
+                      access_flags, level, source_id);
+out:
     vtd_iommu_unlock(s);
 
     entry->iova = addr & page_mask;
@@ -3080,6 +3174,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
     trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
 
     assert(am <= VTD_MAMV);
+    info.is_piotlb = false;
     info.domain_id = domain_id;
     info.addr = addr;
     info.mask = ~((1 << am) - 1);
@@ -4063,12 +4158,16 @@ static void vtd_flush_pasid_iotlb(gpointer key, gpointer value,
         vtd_invalidate_piotlb(vtd_pasid_as,
                               piotlb_info->inv_data);
     }
+}
 
-    /*
-     * TODO: needs to add QEMU piotlb flush when QEMU piotlb
-     * infrastructure is ready. For now, it is enough for passthru
-     * devices.
-     */
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+                                         gpointer user_data)
+{
+    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+
+    return ((entry->domain_id == info->domain_id) &&
+            (entry->pasid == info->pasid));
 }
 
 static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
@@ -4076,6 +4175,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
 {
     struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
     VTDPIOTLBInvInfo piotlb_info;
+    VTDIOTLBPageInvInfo info;
 
     cache_info.addr = 0;
     cache_info.npages = (uint64_t)-1;
@@ -4084,6 +4184,9 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
     piotlb_info.pasid = pasid;
     piotlb_info.inv_data = &cache_info;
 
+    info.domain_id = domain_id;
+    info.pasid = pasid;
+
     vtd_iommu_lock(s);
     /*
      * Here loops all the vtd_pasid_as instances in s->vtd_pasid_as
@@ -4092,6 +4195,8 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
      */
     g_hash_table_foreach(s->vtd_pasid_as,
                          vtd_flush_pasid_iotlb, &piotlb_info);
+    g_hash_table_foreach_remove(s->p_iotlb, vtd_hash_remove_by_pasid,
+                                &info);
     vtd_iommu_unlock(s);
 }
 
@@ -4101,6 +4206,7 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
 {
     struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
     VTDPIOTLBInvInfo piotlb_info;
+    VTDIOTLBPageInvInfo info;
 
     cache_info.addr = addr;
     cache_info.npages = 1 << am;
@@ -4110,6 +4216,12 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
     piotlb_info.pasid = pasid;
     piotlb_info.inv_data = &cache_info;
 
+    info.is_piotlb = true;
+    info.domain_id = domain_id;
+    info.pasid = pasid;
+    info.addr = addr;
+    info.mask = ~((1 << am) - 1);
+
     vtd_iommu_lock(s);
     /*
      * Here loops all the vtd_pasid_as instances in s->vtd_pasid_as
@@ -4118,6 +4230,8 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
      */
     g_hash_table_foreach(s->vtd_pasid_as,
                          vtd_flush_pasid_iotlb, &piotlb_info);
+    g_hash_table_foreach_remove(s->p_iotlb,
+                                vtd_hash_remove_by_page, &info);
     vtd_iommu_unlock(s);
 }
 
@@ -6034,6 +6148,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     /* No corresponding destroy */
     s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal,
                                      g_free, g_free);
+    s->p_iotlb = g_hash_table_new_full(&g_str_hash, &g_str_equal,
+                                       g_free, g_free);
     s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
                                       g_free, g_free);
     s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal,
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 00b27bc5b1..7c36f34ae8 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -31,6 +31,7 @@ vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID slective PC invalida
 vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev: 0x%"PRIx16
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
+vtd_piotlb_page_hit(uint16_t sid, uint32_t pasid, uint64_t addr, uint64_t pte, uint16_t domain) "PIOTLB page hit sid 0x%"PRIx16" pasid %"PRIu32" iova 0x%"PRIx64" pte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
 vtd_iotlb_pe_hit(uint32_t pasid, uint64_t val0, uint32_t gen) "IOTLB pasid hit pasid %"PRIu32" val[0] 0x%"PRIx64" gen %"PRIu32
-- 
2.34.1



  parent reply	other threads:[~2024-01-15 10:43 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-15 10:37 [PATCH rfcv1 00/23] intel_iommu: Enable stage-1 translation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 01/23] Update linux header to support nested hwpt alloc Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 02/23] backends/iommufd: add helpers for allocating user-managed HWPT Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 03/23] backends/iommufd_device: introduce IOMMUFDDevice targeted interface Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 04/23] vfio: implement IOMMUFDDevice interface callbacks Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 05/23] intel_iommu: add a placeholder variable for scalable modern mode Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 06/23] intel_iommu: check and sync host IOMMU cap/ecap in " Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 07/23] intel_iommu: process PASID cache invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 08/23] intel_iommu: add PASID cache management infrastructure Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 09/23] vfio/iommufd_device: Add ioas_id in IOMMUFDDevice and pass to vIOMMU Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 10/23] intel_iommu: bind/unbind guest page table to host Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 11/23] intel_iommu: ERRATA_772415 workaround Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 12/23] intel_iommu: replay pasid binds after context cache invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 13/23] intel_iommu: process PASID-based iotlb invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 14/23] intel_iommu: propagate PASID-based iotlb invalidation to host Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 15/23] intel_iommu: process PASID-based Device-TLB invalidation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 16/23] intel_iommu: rename slpte in iotlb_entry to pte Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 17/23] intel_iommu: implement firt level translation Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 18/23] intel_iommu: fix the fault reason report Zhenzhong Duan
2024-01-15 10:37 ` Zhenzhong Duan [this message]
2024-01-15 10:37 ` [PATCH rfcv1 20/23] intel_iommu: piotlb invalidation should notify unmap Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 21/23] intel_iommu: invalidate piotlb when flush pasid Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 22/23] intel_iommu: refresh pasid bind after pasid cache force reset Zhenzhong Duan
2024-01-15 10:37 ` [PATCH rfcv1 23/23] intel_iommu: modify x-scalable-mode to be string option Zhenzhong Duan
     [not found]   ` <CGME20240131144013eucas1p22d46339ae42f54dd59c23e8b95502dda@eucas1p2.samsung.com>
2024-01-31 14:40     ` Joel Granados
2024-01-31 15:24       ` Yi Liu
2024-02-04 21:05         ` Joel Granados
2024-01-22  4:29 ` [PATCH rfcv1 00/23] intel_iommu: Enable stage-1 translation Jason Wang
2024-01-22  5:59   ` Duan, Zhenzhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240115103735.132209-20-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clg@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).