* [PATCH v3 1/4] iommu/vt-d: Factor out invalidation descriptor composition
2024-08-15 6:52 [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Tina Zhang
@ 2024-08-15 6:52 ` Tina Zhang
2024-08-15 6:52 ` [PATCH v3 2/4] iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching Tina Zhang
` (3 subsequent siblings)
4 siblings, 0 replies; 13+ messages in thread
From: Tina Zhang @ 2024-08-15 6:52 UTC (permalink / raw)
To: Lu Baolu, Kevin Tian; +Cc: iommu, linux-kernel, Tina Zhang
Separate the logic for constructing IOTLB and device TLB invalidation
descriptors from the qi_flush interfaces. New helpers, qi_desc(), are
introduced to encapsulate this common functionality.
Moving descriptor composition code to new helpers enables its reuse in
the upcoming qi_batch interfaces.
No functional changes are intended.
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
drivers/iommu/intel/dmar.c | 93 ++----------------------------
drivers/iommu/intel/iommu.h | 109 ++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+), 87 deletions(-)
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 01e157d89a163..eaf862e8dea1a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1526,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type)
{
- u8 dw = 0, dr = 0;
-
struct qi_desc desc;
- int ih = 0;
-
- if (cap_write_drain(iommu->cap))
- dw = 1;
-
- if (cap_read_drain(iommu->cap))
- dr = 1;
-
- desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
- | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
- | QI_IOTLB_AM(size_order);
- desc.qw2 = 0;
- desc.qw3 = 0;
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1561,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- if (mask) {
- addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
- } else
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
-
- if (qdep >= QI_DEV_IOTLB_MAX_INVS)
- qdep = 0;
-
- desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
- QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1594,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
return;
}
- if (npages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(npages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
-
- if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
- addr = ALIGN_DOWN(addr, align);
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(addr) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
-
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1623,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{
- unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
/*
@@ -1635,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
- QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
- QI_DEV_IOTLB_PFSID(pfsid);
-
- /*
- * If S bit is 0, we only flush a single page. If S bit is set,
- * The least significant zero bit indicates the invalidation address
- * range. VT-d spec 6.5.2.6.
- * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
- * size order = 0 is PAGE_SIZE 4KB
- * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
- * ECAP.
- */
- if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
- pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
- addr, size_order);
-
- /* Take page address */
- desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
-
- if (size_order) {
- /*
- * Existing 0s in address below size_order may be the least
- * significant bit, we must set them to 1s to avoid having
- * smaller size than desired.
- */
- desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
- VTD_PAGE_SHIFT);
- /* Clear size_order bit to indicate size */
- desc.qw1 &= ~mask;
- /* Set the S bit to indicate flushing more than 1 page */
- desc.qw1 |= QI_DEV_EIOTLB_SIZE;
- }
-
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
+ qdep, addr, size_order,
+ &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 01002ae2a091a..e297a322ba2d9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1066,6 +1066,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages)
return npages << VTD_PAGE_SHIFT;
}
+static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_desc *desc)
+{
+ u8 dw = 0, dr = 0;
+ int ih = 0;
+
+ if (cap_write_drain(iommu->cap))
+ dw = 1;
+
+ if (cap_read_drain(iommu->cap))
+ dr = 1;
+
+ desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+ | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
+ desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+ | QI_IOTLB_AM(size_order);
+ desc->qw2 = 0;
+ desc->qw3 = 0;
+}
+
+static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr,
+ unsigned int mask, struct qi_desc *desc)
+{
+ if (mask) {
+ addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
+ desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+ } else {
+ desc->qw1 = QI_DEV_IOTLB_ADDR(addr);
+ }
+
+ if (qdep >= QI_DEV_IOTLB_MAX_INVS)
+ qdep = 0;
+
+ desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+ QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
+ desc->qw2 = 0;
+ desc->qw3 = 0;
+}
+
+static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih,
+ struct qi_desc *desc)
+{
+ if (npages == -1) {
+ desc->qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc->qw1 = 0;
+ } else {
+ int mask = ilog2(__roundup_pow_of_two(npages));
+ unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
+
+ if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
+ addr = ALIGN_DOWN(addr, align);
+
+ desc->qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc->qw1 = QI_EIOTLB_ADDR(addr) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
+ }
+}
+
+static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid,
+ u16 qdep, u64 addr,
+ unsigned int size_order,
+ struct qi_desc *desc)
+{
+ unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
+
+ desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(pfsid);
+
+ /*
+ * If S bit is 0, we only flush a single page. If S bit is set,
+ * The least significant zero bit indicates the invalidation address
+ * range. VT-d spec 6.5.2.6.
+ * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
+ * size order = 0 is PAGE_SIZE 4KB
+ * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
+ * ECAP.
+ */
+ if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
+ pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
+ addr, size_order);
+
+ /* Take page address */
+ desc->qw1 = QI_DEV_EIOTLB_ADDR(addr);
+
+ if (size_order) {
+ /*
+ * Existing 0s in address below size_order may be the least
+ * significant bit, we must set them to 1s to avoid having
+ * smaller size than desired.
+ */
+ desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
+ VTD_PAGE_SHIFT);
+ /* Clear size_order bit to indicate size */
+ desc->qw1 &= ~mask;
+ /* Set the S bit to indicate flushing more than 1 page */
+ desc->qw1 |= QI_DEV_EIOTLB_SIZE;
+ }
+}
+
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 2/4] iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching
2024-08-15 6:52 [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Tina Zhang
2024-08-15 6:52 ` [PATCH v3 1/4] iommu/vt-d: Factor out invalidation descriptor composition Tina Zhang
@ 2024-08-15 6:52 ` Tina Zhang
2024-08-15 6:52 ` [PATCH v3 3/4] iommu/vt-d: Add qi_batch for dmar_domain Tina Zhang
` (2 subsequent siblings)
4 siblings, 0 replies; 13+ messages in thread
From: Tina Zhang @ 2024-08-15 6:52 UTC (permalink / raw)
To: Lu Baolu, Kevin Tian; +Cc: iommu, linux-kernel, Tina Zhang
Extracts IOTLB and Dev-IOTLB invalidation logic from cache tag flush
interfaces into dedicated helper functions. It prepares the codebase
for upcoming changes to support batched cache invalidations.
To enable direct use of qi_flush helpers in the new functions,
iommu->flush.flush_iotlb and quirk_extra_dev_tlb_flush() are opened up.
No functional changes are intended.
Co-developed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
drivers/iommu/intel/cache.c | 142 ++++++++++++++++++++----------------
drivers/iommu/intel/iommu.c | 5 +-
drivers/iommu/intel/iommu.h | 3 +
3 files changed, 83 insertions(+), 67 deletions(-)
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 44e92638c0cd1..08f7ce2c16c3b 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -255,6 +255,78 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}
+static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
+ unsigned long addr, unsigned long pages,
+ unsigned long mask, int ih)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ u64 type = DMA_TLB_PSI_FLUSH;
+
+ if (domain->use_first_level) {
+ qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih);
+ return;
+ }
+
+ /*
+ * Fallback to domain selective flush if no PSI support or the size
+ * is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap) || pages == -1) {
+ addr = 0;
+ mask = 0;
+ ih = 0;
+ type = DMA_TLB_DSI_FLUSH;
+ }
+
+ if (ecap_qis(iommu->ecap))
+ qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
+ else
+ __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
+}
+
+static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
+ unsigned long addr, unsigned long mask)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (tag->pasid == IOMMU_NO_PASID) {
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ addr, mask);
+ if (info->dtlb_extra_inval)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
+ info->ats_qdep, addr, mask);
+ return;
+ }
+
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask);
+ if (info->dtlb_extra_inval)
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask);
+}
+
+static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH);
+ if (info->dtlb_extra_inval)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH);
+}
+
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
@@ -270,30 +342,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
- struct device_domain_info *info;
- u16 sid;
-
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
- if (domain->use_first_level) {
- qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, addr, pages, ih);
- } else {
- /*
- * Fallback to domain selective flush if no
- * PSI support or the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) ||
- mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
- }
+ cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
@@ -307,18 +359,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
mask = MAX_AGAW_PFN_WIDTH;
fallthrough;
case CACHE_TAG_DEVTLB:
- info = dev_iommu_priv_get(tag->dev);
- sid = PCI_DEVID(info->bus, info->devfn);
-
- if (tag->pasid == IOMMU_NO_PASID)
- qi_flush_dev_iotlb(iommu, sid, info->pfsid,
- info->ats_qdep, addr, mask);
- else
- qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
- tag->pasid, info->ats_qdep,
- addr, mask);
-
- quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
break;
}
@@ -338,29 +379,14 @@ void cache_tag_flush_all(struct dmar_domain *domain)
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
- struct device_domain_info *info;
- u16 sid;
-
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
- if (domain->use_first_level)
- qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, 0, -1, 0);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
+ cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
- info = dev_iommu_priv_get(tag->dev);
- sid = PCI_DEVID(info->bus, info->devfn);
-
- qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- 0, MAX_AGAW_PFN_WIDTH);
- quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
- IOMMU_NO_PASID, info->ats_qdep);
+ cache_tag_flush_devtlb_all(domain, tag);
break;
}
@@ -399,20 +425,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
}
if (tag->type == CACHE_TAG_IOTLB ||
- tag->type == CACHE_TAG_NESTING_IOTLB) {
- /*
- * Fallback to domain selective flush if no
- * PSI support or the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) ||
- mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- addr, mask,
- DMA_TLB_PSI_FLUSH);
- }
+ tag->type == CACHE_TAG_NESTING_IOTLB)
+ cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 159da629349c4..14020414af892 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1184,9 +1184,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-/* return value determine if we need a write buffer flush */
-static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type)
+void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type)
{
int tlb_offset = ecap_iotlb_offset(iommu->ecap);
u64 val = 0, val_iva = 0;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e297a322ba2d9..74634805abd19 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1206,6 +1206,9 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options);
+
+void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
/*
* Options used in qi_submit_sync:
* QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 3/4] iommu/vt-d: Add qi_batch for dmar_domain
2024-08-15 6:52 [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Tina Zhang
2024-08-15 6:52 ` [PATCH v3 1/4] iommu/vt-d: Factor out invalidation descriptor composition Tina Zhang
2024-08-15 6:52 ` [PATCH v3 2/4] iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching Tina Zhang
@ 2024-08-15 6:52 ` Tina Zhang
2024-08-15 6:52 ` [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation Tina Zhang
2024-09-02 2:32 ` [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Baolu Lu
4 siblings, 0 replies; 13+ messages in thread
From: Tina Zhang @ 2024-08-15 6:52 UTC (permalink / raw)
To: Lu Baolu, Kevin Tian; +Cc: iommu, linux-kernel, Tina Zhang
From: Lu Baolu <baolu.lu@linux.intel.com>
Introduces a qi_batch structure to hold batched cache invalidation
descriptors on a per-dmar_domain basis. A fixed-size descriptor
array is used for simplicity. The qi_batch is allocated when the
first cache tag is added to the domain and freed during
iommu_free_domain().
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
drivers/iommu/intel/cache.c | 7 +++++++
drivers/iommu/intel/iommu.c | 1 +
drivers/iommu/intel/iommu.h | 14 ++++++++++++++
drivers/iommu/intel/nested.c | 1 +
drivers/iommu/intel/svm.c | 5 ++++-
5 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 08f7ce2c16c3b..21485c86e7381 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain,
u16 did = domain_get_id_for_dev(domain, dev);
int ret;
+ /* domain->qi_bach will be freed in iommu_free_domain() path. */
+ if (!domain->qi_batch) {
+ domain->qi_batch = kzalloc(sizeof(struct qi_batch), GFP_KERNEL);
+ if (!domain->qi_batch)
+ return -ENOMEM;
+ }
+
ret = __cache_tag_assign_domain(domain, did, dev, pasid);
if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
return ret;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 14020414af892..cde5ed4d0fede 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1586,6 +1586,7 @@ static void domain_exit(struct dmar_domain *domain)
if (WARN_ON(!list_empty(&domain->devices)))
return;
+ kfree(domain->qi_batch);
kfree(domain);
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 74634805abd19..d21eca94cb8f9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -584,6 +584,19 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */
};
+/*
+ * We start simply by using a fixed size for the batched descriptors. This
+ * size is currently sufficient for our needs. Future improvements could
+ * involve dynamically allocating the batch buffer based on actual demand,
+ * allowing us to adjust the batch size for optimal performance in different
+ * scenarios.
+ */
+#define QI_MAX_BATCHED_DESC_COUNT 16
+struct qi_batch {
+ struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT];
+ unsigned int index;
+};
+
struct dmar_domain {
int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */
@@ -608,6 +621,7 @@ struct dmar_domain {
spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */
+ struct qi_batch *qi_batch; /* Batched QI descriptors */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 36a91b1b52be3..433c58944401f 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -83,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
spin_lock(&s2_domain->s1_lock);
list_del(&dmar_domain->s2_link);
spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain->qi_batch);
kfree(dmar_domain);
}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 0e3a9b38bef21..3421813995db8 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static void intel_mm_free_notifier(struct mmu_notifier *mn)
{
- kfree(container_of(mn, struct dmar_domain, notifier));
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+
+ kfree(domain->qi_batch);
+ kfree(domain);
}
static const struct mmu_notifier_ops intel_mmuops = {
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-15 6:52 [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Tina Zhang
` (2 preceding siblings ...)
2024-08-15 6:52 ` [PATCH v3 3/4] iommu/vt-d: Add qi_batch for dmar_domain Tina Zhang
@ 2024-08-15 6:52 ` Tina Zhang
2024-08-16 16:38 ` Jacob Pan
2024-09-02 2:32 ` [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Baolu Lu
4 siblings, 1 reply; 13+ messages in thread
From: Tina Zhang @ 2024-08-15 6:52 UTC (permalink / raw)
To: Lu Baolu, Kevin Tian; +Cc: iommu, linux-kernel, Tina Zhang
Converts IOTLB and Dev-IOTLB invalidation to a batched model. Cache tag
invalidation requests for a domain are now accumulated in a qi_batch
structure before being flushed in bulk. It replaces the previous per-
request qi_flush approach with a more efficient batching mechanism.
Co-developed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
drivers/iommu/intel/cache.c | 122 +++++++++++++++++++++++++++++++-----
1 file changed, 107 insertions(+), 15 deletions(-)
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 21485c86e7381..983769de3bc90 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -262,6 +262,79 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}
+static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+ if (!iommu || !batch->index)
+ return;
+
+ qi_submit_sync(iommu, batch->descs, batch->index, 0);
+
+ /* Reset the index value and clean the whole batch buffer. */
+ memset(batch, 0, sizeof(*batch));
+}
+
+static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+ if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
+ qi_batch_flush_descs(iommu, batch);
+}
+
+static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_batch *batch)
+{
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned int mask,
+ struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any Device-TLB
+ * invalidation requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
+ u64 addr, unsigned long npages, bool ih,
+ struct qi_batch *batch)
+{
+ /*
+ * npages == -1 means a PASID-selective invalidation, otherwise,
+ * a positive value for Page-selective-within-PASID invalidation.
+ * 0 is not a valid input.
+ */
+ if (!npages)
+ return;
+
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order, struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any
+ * Device-TLB invalidation requests while address remapping hardware
+ * is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
+ &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
unsigned long addr, unsigned long pages,
unsigned long mask, int ih)
@@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
u64 type = DMA_TLB_PSI_FLUSH;
if (domain->use_first_level) {
- qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih);
+ qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
+ pages, ih, domain->qi_batch);
return;
}
@@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
}
if (ecap_qis(iommu->ecap))
- qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
+ qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
+ domain->qi_batch);
else
__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
}
@@ -303,19 +378,20 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_
sid = PCI_DEVID(info->bus, info->devfn);
if (tag->pasid == IOMMU_NO_PASID) {
- qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- addr, mask);
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ addr, mask, domain->qi_batch);
if (info->dtlb_extra_inval)
- qi_flush_dev_iotlb(iommu, sid, info->pfsid,
- info->ats_qdep, addr, mask);
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ addr, mask, domain->qi_batch);
return;
}
- qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
- info->ats_qdep, addr, mask);
+ qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask, domain->qi_batch);
if (info->dtlb_extra_inval)
- qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
- info->ats_qdep, addr, mask);
+ qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask,
+ domain->qi_batch);
}
static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
@@ -327,11 +403,11 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
- qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
- MAX_AGAW_PFN_WIDTH);
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH, domain->qi_batch);
if (info->dtlb_extra_inval)
- qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
- MAX_AGAW_PFN_WIDTH);
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH, domain->qi_batch);
}
/*
@@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long end, int ih)
{
+ struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
@@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
@@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
@@ -381,11 +463,16 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
*/
void cache_tag_flush_all(struct dmar_domain *domain)
{
+ struct intel_iommu *iommu = NULL;
struct cache_tag *tag;
unsigned long flags;
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
@@ -399,6 +486,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
trace_cache_tag_flush_all(tag);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
@@ -416,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end)
{
+ struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
@@ -424,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
iommu_flush_write_buffer(iommu);
@@ -437,5 +528,6 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-15 6:52 ` [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation Tina Zhang
@ 2024-08-16 16:38 ` Jacob Pan
2024-08-17 3:28 ` Baolu Lu
0 siblings, 1 reply; 13+ messages in thread
From: Jacob Pan @ 2024-08-16 16:38 UTC (permalink / raw)
To: Tina Zhang; +Cc: Lu Baolu, Kevin Tian, iommu, linux-kernel
On Thu, 15 Aug 2024 14:52:21 +0800
Tina Zhang <tina.zhang@intel.com> wrote:
> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
> dmar_domain *domain, struct cache_tag * u64 type = DMA_TLB_PSI_FLUSH;
>
> if (domain->use_first_level) {
> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
> addr, pages, ih);
> + qi_batch_add_piotlb(iommu, tag->domain_id,
> tag->pasid, addr,
> + pages, ih, domain->qi_batch);
> return;
> }
>
> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
> dmar_domain *domain, struct cache_tag * }
>
> if (ecap_qis(iommu->ecap))
> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
> mask, type);
> + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih,
> mask, type,
> + domain->qi_batch);
>
If I understand this correctly, IOTLB flush maybe deferred until the
batch array is full, right? If so, is there a security gap where
callers think the mapping is gone after the call returns?
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-16 16:38 ` Jacob Pan
@ 2024-08-17 3:28 ` Baolu Lu
2024-08-19 7:58 ` Yi Liu
2024-08-19 15:40 ` Jacob Pan
0 siblings, 2 replies; 13+ messages in thread
From: Baolu Lu @ 2024-08-17 3:28 UTC (permalink / raw)
To: Jacob Pan, Tina Zhang; +Cc: baolu.lu, Kevin Tian, iommu, linux-kernel
On 2024/8/17 0:38, Jacob Pan wrote:
> On Thu, 15 Aug 2024 14:52:21 +0800
> Tina Zhang <tina.zhang@intel.com> wrote:
>
>> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
>> dmar_domain *domain, struct cache_tag * u64 type = DMA_TLB_PSI_FLUSH;
>>
>> if (domain->use_first_level) {
>> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
>> addr, pages, ih);
>> + qi_batch_add_piotlb(iommu, tag->domain_id,
>> tag->pasid, addr,
>> + pages, ih, domain->qi_batch);
>> return;
>> }
>>
>> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
>> dmar_domain *domain, struct cache_tag * }
>>
>> if (ecap_qis(iommu->ecap))
>> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
>> mask, type);
>> + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih,
>> mask, type,
>> + domain->qi_batch);
>>
> If I understand this correctly, IOTLB flush maybe deferred until the
> batch array is full, right? If so, is there a security gap where
> callers think the mapping is gone after the call returns?
No. All related caches are flushed before function return. A domain can
have multiple cache tags. Previously, we sent individual cache
invalidation requests to hardware. This change combines all necessary
invalidation requests into a single batch and raise them to hardware
together to make it more efficient.
Thanks,
baolu
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-17 3:28 ` Baolu Lu
@ 2024-08-19 7:58 ` Yi Liu
2024-08-19 15:53 ` Jacob Pan
2024-08-19 15:40 ` Jacob Pan
1 sibling, 1 reply; 13+ messages in thread
From: Yi Liu @ 2024-08-19 7:58 UTC (permalink / raw)
To: Baolu Lu, Jacob Pan, Tina Zhang; +Cc: Kevin Tian, iommu, linux-kernel
On 2024/8/17 11:28, Baolu Lu wrote:
> On 2024/8/17 0:38, Jacob Pan wrote:
>> On Thu, 15 Aug 2024 14:52:21 +0800
>> Tina Zhang <tina.zhang@intel.com> wrote:
>>
>>> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
>>> dmar_domain *domain, struct cache_tag * u64 type = DMA_TLB_PSI_FLUSH;
>>> if (domain->use_first_level) {
>>> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
>>> addr, pages, ih);
>>> + qi_batch_add_piotlb(iommu, tag->domain_id,
>>> tag->pasid, addr,
>>> + pages, ih, domain->qi_batch);
>>> return;
>>> }
>>> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
>>> dmar_domain *domain, struct cache_tag * }
>>> if (ecap_qis(iommu->ecap))
>>> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
>>> mask, type);
>>> + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih,
>>> mask, type,
>>> + domain->qi_batch);
>> If I understand this correctly, IOTLB flush maybe deferred until the
>> batch array is full, right? If so, is there a security gap where
>> callers think the mapping is gone after the call returns?
> No. All related caches are flushed before function return. A domain can
> have multiple cache tags. Previously, we sent individual cache
> invalidation requests to hardware. This change combines all necessary
> invalidation requests into a single batch and raise them to hardware
> together to make it more efficient.
Hi Jacob,
Do you mean the configuration that iommu.strict==0? :) As the above
explanation from Baolu, this patch is not for that although it uses
the term "batched". Also, it would reduce the VMExits that due to the
IOTLB/DevTLB invalidation a lot in the virtualization environment.
--
Regards,
Yi Liu
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-19 7:58 ` Yi Liu
@ 2024-08-19 15:53 ` Jacob Pan
0 siblings, 0 replies; 13+ messages in thread
From: Jacob Pan @ 2024-08-19 15:53 UTC (permalink / raw)
To: Yi Liu; +Cc: Baolu Lu, Tina Zhang, Kevin Tian, iommu, linux-kernel
Hi Yi,
On Mon, 19 Aug 2024 15:58:35 +0800
Yi Liu <yi.l.liu@intel.com> wrote:
> On 2024/8/17 11:28, Baolu Lu wrote:
> > On 2024/8/17 0:38, Jacob Pan wrote:
> >> On Thu, 15 Aug 2024 14:52:21 +0800
> >> Tina Zhang <tina.zhang@intel.com> wrote:
> >>
> >>> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
> >>> dmar_domain *domain, struct cache_tag * u64 type =
> >>> DMA_TLB_PSI_FLUSH; if (domain->use_first_level) {
> >>> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
> >>> addr, pages, ih);
> >>> + qi_batch_add_piotlb(iommu, tag->domain_id,
> >>> tag->pasid, addr,
> >>> + pages, ih, domain->qi_batch);
> >>> return;
> >>> }
> >>> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
> >>> dmar_domain *domain, struct cache_tag * }
> >>> if (ecap_qis(iommu->ecap))
> >>> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
> >>> mask, type);
> >>> + qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih,
> >>> mask, type,
> >>> + domain->qi_batch);
> >> If I understand this correctly, IOTLB flush maybe deferred until
> >> the batch array is full, right? If so, is there a security gap
> >> where callers think the mapping is gone after the call returns?
> > No. All related caches are flushed before function return. A domain
> > can have multiple cache tags. Previously, we sent individual cache
> > invalidation requests to hardware. This change combines all
> > necessary invalidation requests into a single batch and raise them
> > to hardware together to make it more efficient.
>
> Hi Jacob,
>
> Do you mean the configuration that iommu.strict==0? :) As the above
> explanation from Baolu, this patch is not for that although it uses
> the term "batched". Also, it would reduce the VMExits that due to the
> IOTLB/DevTLB invalidation a lot in the virtualization environment.
>
No, I understand this is a different "batch", not for deferred flush.
I am asking why it has to gather QI_MAX_BATCHED_DESC_COUNT before it
does the flush. See my other reply.
Thanks,
Jacob
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-17 3:28 ` Baolu Lu
2024-08-19 7:58 ` Yi Liu
@ 2024-08-19 15:40 ` Jacob Pan
2024-08-20 2:06 ` Baolu Lu
1 sibling, 1 reply; 13+ messages in thread
From: Jacob Pan @ 2024-08-19 15:40 UTC (permalink / raw)
To: Baolu Lu; +Cc: Tina Zhang, Kevin Tian, iommu, linux-kernel
On Sat, 17 Aug 2024 11:28:21 +0800
Baolu Lu <baolu.lu@linux.intel.com> wrote:
> On 2024/8/17 0:38, Jacob Pan wrote:
> > On Thu, 15 Aug 2024 14:52:21 +0800
> > Tina Zhang <tina.zhang@intel.com> wrote:
> >
> >> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
> >> dmar_domain *domain, struct cache_tag * u64 type =
> >> DMA_TLB_PSI_FLUSH;
> >> if (domain->use_first_level) {
> >> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
> >> addr, pages, ih);
> >> + qi_batch_add_piotlb(iommu, tag->domain_id,
> >> tag->pasid, addr,
> >> + pages, ih, domain->qi_batch);
> >> return;
> >> }
> >>
> >> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
> >> dmar_domain *domain, struct cache_tag * }
> >>
> >> if (ecap_qis(iommu->ecap))
> >> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
> >> mask, type);
> >> + qi_batch_add_iotlb(iommu, tag->domain_id, addr |
> >> ih, mask, type,
> >> + domain->qi_batch);
> >>
> > If I understand this correctly, IOTLB flush maybe deferred until the
> > batch array is full, right? If so, is there a security gap where
> > callers think the mapping is gone after the call returns?
> No. All related caches are flushed before function return. A domain
> can have multiple cache tags. Previously, we sent individual cache
> invalidation requests to hardware. This change combines all necessary
> invalidation requests into a single batch and raise them to hardware
> together to make it more efficient.
I was looking at the code below, if the index does not reach
QI_MAX_BATCHED_DESC_COUNT. There will be no flush after
cache_tag_flush_iotlb() returns, right?
+static void qi_batch_increment_index(struct
intel_iommu *iommu, struct qi_batch *batch) +{
+ if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
+ qi_batch_flush_descs(iommu, batch);
+}
+
+static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64
addr,
+ unsigned int size_order, u64 type,
+ struct qi_batch *batch)
+{
+ qi_desc_iotlb(iommu, did, addr, size_order, type,
&(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
> Thanks,
> baolu
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-19 15:40 ` Jacob Pan
@ 2024-08-20 2:06 ` Baolu Lu
2024-08-20 23:04 ` Jacob Pan
0 siblings, 1 reply; 13+ messages in thread
From: Baolu Lu @ 2024-08-20 2:06 UTC (permalink / raw)
To: Jacob Pan; +Cc: baolu.lu, Tina Zhang, Kevin Tian, iommu, linux-kernel
On 8/19/24 11:40 PM, Jacob Pan wrote:
> On Sat, 17 Aug 2024 11:28:21 +0800
> Baolu Lu<baolu.lu@linux.intel.com> wrote:
>
>> On 2024/8/17 0:38, Jacob Pan wrote:
>>> On Thu, 15 Aug 2024 14:52:21 +0800
>>> Tina Zhang<tina.zhang@intel.com> wrote:
>>>
>>>> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
>>>> dmar_domain *domain, struct cache_tag * u64 type =
>>>> DMA_TLB_PSI_FLUSH;
>>>> if (domain->use_first_level) {
>>>> - qi_flush_piotlb(iommu, tag->domain_id, tag->pasid,
>>>> addr, pages, ih);
>>>> + qi_batch_add_piotlb(iommu, tag->domain_id,
>>>> tag->pasid, addr,
>>>> + pages, ih, domain->qi_batch);
>>>> return;
>>>> }
>>>>
>>>> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
>>>> dmar_domain *domain, struct cache_tag * }
>>>>
>>>> if (ecap_qis(iommu->ecap))
>>>> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
>>>> mask, type);
>>>> + qi_batch_add_iotlb(iommu, tag->domain_id, addr |
>>>> ih, mask, type,
>>>> + domain->qi_batch);
>>>>
>>> If I understand this correctly, IOTLB flush maybe deferred until the
>>> batch array is full, right? If so, is there a security gap where
>>> callers think the mapping is gone after the call returns?
>> No. All related caches are flushed before function return. A domain
>> can have multiple cache tags. Previously, we sent individual cache
>> invalidation requests to hardware. This change combines all necessary
>> invalidation requests into a single batch and raise them to hardware
>> together to make it more efficient.
> I was looking at the code below, if the index does not reach
> QI_MAX_BATCHED_DESC_COUNT. There will be no flush after
> cache_tag_flush_iotlb() returns, right?
No. qi_batch_flush_descs() is called explicitly before return.
@@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct
dmar_domain *domain, struct cache_
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long
start,
unsigned long end, int ih)
{
+ struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
@@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain
*domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
@@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain
*domain, unsigned long start,
trace_cache_tag_flush_range(tag, start, end, addr,
pages, mask);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
Thanks,
baolu
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation
2024-08-20 2:06 ` Baolu Lu
@ 2024-08-20 23:04 ` Jacob Pan
0 siblings, 0 replies; 13+ messages in thread
From: Jacob Pan @ 2024-08-20 23:04 UTC (permalink / raw)
To: Baolu Lu; +Cc: Tina Zhang, Kevin Tian, iommu, linux-kernel
Hi Baolu,
On Tue, 20 Aug 2024 10:06:05 +0800
Baolu Lu <baolu.lu@linux.intel.com> wrote:
> On 8/19/24 11:40 PM, Jacob Pan wrote:
> > On Sat, 17 Aug 2024 11:28:21 +0800
> > Baolu Lu<baolu.lu@linux.intel.com> wrote:
> >
> >> On 2024/8/17 0:38, Jacob Pan wrote:
> >>> On Thu, 15 Aug 2024 14:52:21 +0800
> >>> Tina Zhang<tina.zhang@intel.com> wrote:
> >>>
> >>>> @@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct
> >>>> dmar_domain *domain, struct cache_tag * u64 type =
> >>>> DMA_TLB_PSI_FLUSH;
> >>>> if (domain->use_first_level) {
> >>>> - qi_flush_piotlb(iommu, tag->domain_id,
> >>>> tag->pasid, addr, pages, ih);
> >>>> + qi_batch_add_piotlb(iommu, tag->domain_id,
> >>>> tag->pasid, addr,
> >>>> + pages, ih,
> >>>> domain->qi_batch); return;
> >>>> }
> >>>>
> >>>> @@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct
> >>>> dmar_domain *domain, struct cache_tag * }
> >>>>
> >>>> if (ecap_qis(iommu->ecap))
> >>>> - qi_flush_iotlb(iommu, tag->domain_id, addr | ih,
> >>>> mask, type);
> >>>> + qi_batch_add_iotlb(iommu, tag->domain_id, addr |
> >>>> ih, mask, type,
> >>>> + domain->qi_batch);
> >>>>
> >>> If I understand this correctly, IOTLB flush maybe deferred until
> >>> the batch array is full, right? If so, is there a security gap
> >>> where callers think the mapping is gone after the call returns?
> >> No. All related caches are flushed before function return. A domain
> >> can have multiple cache tags. Previously, we sent individual cache
> >> invalidation requests to hardware. This change combines all
> >> necessary invalidation requests into a single batch and raise them
> >> to hardware together to make it more efficient.
> > I was looking at the code below, if the index does not reach
> > QI_MAX_BATCHED_DESC_COUNT. There will be no flush after
> > cache_tag_flush_iotlb() returns, right?
>
> No. qi_batch_flush_descs() is called explicitly before return.
I see, cache_tag_flush_iotlb() is really just adding descriptors to the
batch. Not doing any flush for most cases. IMHO, the name is a little
confusing.
Thanks,
Jacob
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation
2024-08-15 6:52 [PATCH v3 0/4] Batch IOTLB/dev-IOTLB invalidation Tina Zhang
` (3 preceding siblings ...)
2024-08-15 6:52 ` [PATCH v3 4/4] iommu/vt-d: Introduce batched cache invalidation Tina Zhang
@ 2024-09-02 2:32 ` Baolu Lu
4 siblings, 0 replies; 13+ messages in thread
From: Baolu Lu @ 2024-09-02 2:32 UTC (permalink / raw)
To: Tina Zhang, Kevin Tian; +Cc: baolu.lu, iommu, linux-kernel
On 8/15/24 2:52 PM, Tina Zhang wrote:
> IOTLB and dev-IOTLB invalidation operations are performance-critical.
> The current implementation in the VT-d driver submits these commands
> individually, leading to some inefficiencies due to the IOMMU
> programming and invalidation command processing overhead for each
> operation.
>
> This patch series enhances the efficiency of Queue Invalidation (QI)
> operations by adding support for batch processing. Microbenchmarks
> show that with a DSA device working in SVA, batching IOTLB and dev-IOTLB
> invalidations can decrease the time spent in qi_submit_sync()
> by roughly more than 800 cycles.
>
> Changelog
> v3:
> * Rebased on 6.11-rc3.
> * Updated commit messages which are revised by Baolu.
> * Dropped the refactoring quirk_extra_dev_tlb_flush() patch.
> * Added "Add qi_batch for dmar_domain" patch which is provided by Baolu.
>
> v2:
> * Rebased on 6.11-rc2.
> * Updated commit messages.
> * Added changes of refactoring IOTLB/Dev-IOTLB invalidation logic
> and quirk_extra_dev_tlb_flush() logic.
>
> v1:
> https://lore.kernel.org/linux-iommu/20240517003728.251115-1-tina.zhang@intel.com/
>
> Lu Baolu (1):
> iommu/vt-d: Add qi_batch for dmar_domain
>
> Tina Zhang (3):
> iommu/vt-d: Factor out invalidation descriptor composition
> iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching
> iommu/vt-d: Introduce batched cache invalidation
>
> drivers/iommu/intel/cache.c | 239 ++++++++++++++++++++++++++---------
> drivers/iommu/intel/dmar.c | 93 +-------------
> drivers/iommu/intel/iommu.c | 6 +-
> drivers/iommu/intel/iommu.h | 126 ++++++++++++++++++
> drivers/iommu/intel/nested.c | 1 +
> drivers/iommu/intel/svm.c | 5 +-
> 6 files changed, 316 insertions(+), 154 deletions(-)
Queued for v6.12-rc1.
Thanks,
baolu
^ permalink raw reply [flat|nested] 13+ messages in thread