* [PATCH v5 01/20] intel_iommu: Use the latest fault reasons defined by spec
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 02/20] intel_iommu: Make pasid entry type check accurate Zhenzhong Duan
` (19 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Yu Zhang, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
From: Yu Zhang <yu.c.zhang@linux.intel.com>
Spec revision 3.0 or above defines more detailed fault reasons for
scalable mode. So introduce them into emulation code, see spec
section 7.1.2 for details.
Note spec revision has no relation with VERSION register, Guest
kernel should not use that register to judge what features are
supported. Instead cap/ecap bits should be checked.
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu_internal.h | 9 ++++++++-
hw/i386/intel_iommu.c | 25 ++++++++++++++++---------
2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 4323fc5d6d..a987023692 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
* request while disabled */
VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
- VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */
+ /* PASID directory entry access failure */
+ VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
+ /* The Present(P) field of pasid directory entry is 0 */
+ VTD_FR_PASID_DIR_ENTRY_P = 0x51,
+ VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
+ /* The Present(P) field of pasid table entry is 0 */
+ VTD_FR_PASID_ENTRY_P = 0x59,
+ VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */
/* Output address in the interrupt address range for scalable mode */
VTD_FR_SM_INTERRUPT_ADDR = 0x87,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4c0d1d7d47..67dc99cfdf 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
addr = pasid_dir_base + index * entry_size;
if (dma_memory_read(&address_space_memory, addr,
pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ACCESS_ERR;
}
pdire->val = le64_to_cpu(pdire->val);
@@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
dma_addr_t addr,
VTDPASIDEntry *pe)
{
+ uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
addr = addr + index * entry_size;
if (dma_memory_read(&address_space_memory, addr,
pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_TABLE_ACCESS_ERR;
}
for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
pe->val[i] = le64_to_cpu(pe->val[i]);
@@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
/* Do translation type check */
if (!vtd_pe_type_check(x86_iommu, pe)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
- if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
- return -VTD_FR_PASID_TABLE_INV;
+ pgtt = VTD_PE_GET_TYPE(pe);
+ if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
+ !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
return 0;
@@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
}
if (!vtd_pdire_present(&pdire)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ENTRY_P;
}
ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
@@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
}
if (!vtd_pe_present(pe)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_ENTRY_P;
}
return 0;
@@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
}
if (!vtd_pdire_present(&pdire)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ENTRY_P;
}
/*
@@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_ROOT_ENTRY_RSVD] = false,
[VTD_FR_PAGING_ENTRY_RSVD] = true,
[VTD_FR_CONTEXT_ENTRY_TT] = true,
- [VTD_FR_PASID_TABLE_INV] = false,
+ [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
+ [VTD_FR_PASID_DIR_ENTRY_P] = true,
+ [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
+ [VTD_FR_PASID_ENTRY_P] = true,
+ [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_MAX] = false,
};
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 02/20] intel_iommu: Make pasid entry type check accurate
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 01/20] intel_iommu: Use the latest fault reasons defined by spec Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 03/20] intel_iommu: Add a placeholder variable for scalable modern mode Zhenzhong Duan
` (18 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
When guest configures Nested Translation(011b) or First-stage Translation only
(001b), type check passed unaccurately.
Fails the type check in those cases as their simulation isn't supported yet.
Fixes: fb43cf739e1 ("intel_iommu: scalable mode emulation")
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 67dc99cfdf..10b8425a6c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -759,20 +759,16 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
- case VTD_SM_PASID_ENTRY_FLT:
case VTD_SM_PASID_ENTRY_SLT:
- case VTD_SM_PASID_ENTRY_NESTED:
- break;
+ return true;
case VTD_SM_PASID_ENTRY_PT:
- if (!x86_iommu->pt_supported) {
- return false;
- }
- break;
+ return x86_iommu->pt_supported;
+ case VTD_SM_PASID_ENTRY_FLT:
+ case VTD_SM_PASID_ENTRY_NESTED:
default:
/* Unknown type */
return false;
}
- return true;
}
static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 03/20] intel_iommu: Add a placeholder variable for scalable modern mode
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 01/20] intel_iommu: Use the latest fault reasons defined by spec Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 02/20] intel_iommu: Make pasid entry type check accurate Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 04/20] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation Zhenzhong Duan
` (17 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
Add an new element scalable_mode in IntelIOMMUState to mark scalable
modern mode, this element will be exposed as an intel_iommu property
finally.
For now, it's only a placehholder and used for address width
compatibility check and block host device passthrough until nesting
is supported.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
include/hw/i386/intel_iommu.h | 1 +
hw/i386/intel_iommu.c | 23 ++++++++++++++++++-----
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index d372cd396b..3d65bbbd56 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -262,6 +262,7 @@ struct IntelIOMMUState {
bool caching_mode; /* RO - is cap CM enabled? */
bool scalable_mode; /* RO - is Scalable Mode supported? */
+ bool scalable_modern; /* RO - is modern SM supported? */
bool snoop_control; /* RO - is SNP filed supported? */
dma_addr_t root; /* Current root table pointer */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 10b8425a6c..1d13916a98 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3918,7 +3918,13 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
return false;
}
- return true;
+ if (!s->scalable_modern) {
+ /* All checks requested by VTD non-modern mode pass */
+ return true;
+ }
+
+ error_setg(errp, "host device is unsupported in scalable modern mode yet");
+ return false;
}
static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
@@ -4308,14 +4314,21 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
}
}
- /* Currently only address widths supported are 39 and 48 bits */
- if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
- (s->aw_bits != VTD_HOST_AW_48BIT)) {
- error_setg(errp, "Supported values for aw-bits are: %d, %d",
+ if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
+ s->aw_bits != VTD_HOST_AW_48BIT) {
+ error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
+ s->scalable_mode ? "Scalable" : "Legacy",
VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
return false;
}
+ if (s->scalable_modern && s->aw_bits != VTD_HOST_AW_48BIT) {
+ error_setg(errp,
+ "Scalable modern mode: supported values for aw-bits is: %d",
+ VTD_HOST_AW_48BIT);
+ return false;
+ }
+
if (s->scalable_mode && !s->dma_drain) {
error_setg(errp, "Need to set dma_drain for scalable mode");
return false;
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 04/20] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (2 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 03/20] intel_iommu: Add a placeholder variable for scalable modern mode Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 05/20] intel_iommu: Rename slpte to pte Zhenzhong Duan
` (16 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
Per VT-d spec 4.1, 6.5.2.4, "Table 21. PASID-based-IOTLB Invalidation",
PADID-selective PASID-based iotlb invalidation will flush stage-2 iotlb
entries with matching domain id and pasid.
With scalable modern mode introduced, guest could send PASID-selective
PASID-based iotlb invalidation to flush either stage-1 or stage-2 entries.
By this chance, remove old IOTLB related definitions which were unused.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu_internal.h | 14 ++++--
hw/i386/intel_iommu.c | 85 +++++++++++++++++++++++++++++++++-
2 files changed, 93 insertions(+), 6 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index a987023692..48019e2005 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -404,11 +404,6 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
-#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK)
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
/* Mask for Device IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
@@ -443,6 +438,15 @@ typedef union VTDInvDesc VTDInvDesc;
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+/* Masks for PIOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4)
+#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1d13916a98..1b07146e23 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2692,6 +2692,83 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
return true;
}
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+ VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+
+ return ((entry->domain_id == info->domain_id) &&
+ (entry->pasid == info->pasid));
+}
+
+static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
+ uint16_t domain_id, uint32_t pasid)
+{
+ VTDIOTLBPageInvInfo info;
+ VTDAddressSpace *vtd_as;
+ VTDContextEntry ce;
+
+ info.domain_id = domain_id;
+ info.pasid = pasid;
+
+ vtd_iommu_lock(s);
+ g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
+ &info);
+ vtd_iommu_unlock(s);
+
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce) &&
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+
+ if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
+ vtd_as->pasid != pasid) {
+ continue;
+ }
+
+ if (!s->scalable_modern) {
+ vtd_address_space_sync(vtd_as);
+ }
+ }
+ }
+}
+
+static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ uint16_t domain_id;
+ uint32_t pasid;
+ uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0,
+ VTD_INV_DESC_PIOTLB_RSVD_VAL1,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+ __func__, "piotlb inv")) {
+ return false;
+ }
+
+ domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
+ pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
+ switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) {
+ case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
+ vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
+ break;
+
+ case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
+ break;
+
+ default:
+ error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64
+ ", lo=0x%"PRIx64" (type mismatch: 0x%llx)",
+ __func__, inv_desc->val[1], inv_desc->val[0],
+ inv_desc->val[0] & VTD_INV_DESC_IOTLB_G);
+ return false;
+ }
+ return true;
+}
+
static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
@@ -2810,6 +2887,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_PIOTLB:
+ trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
+ if (!vtd_process_piotlb_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
case VTD_INV_DESC_WAIT:
trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
if (!vtd_process_wait_desc(s, &inv_desc)) {
@@ -2837,7 +2921,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
* iommu driver) work, just return true is enough so far.
*/
case VTD_INV_DESC_PC:
- case VTD_INV_DESC_PIOTLB:
if (s->scalable_mode) {
break;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 05/20] intel_iommu: Rename slpte to pte
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (3 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 04/20] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 06/20] intel_iommu: Implement stage-1 translation Zhenzhong Duan
` (15 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Yi Sun, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
From: Yi Liu <yi.l.liu@intel.com>
Because we will support both FST(a.k.a, FLT) and SST(a.k.a, SLT) translation,
rename variable and functions from slpte to pte whenever possible.
But some are SST only, they are renamed with sl_ prefix.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Co-developed-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
hw/i386/intel_iommu_internal.h | 24 +++---
include/hw/i386/intel_iommu.h | 2 +-
hw/i386/intel_iommu.c | 129 +++++++++++++++++----------------
3 files changed, 78 insertions(+), 77 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 48019e2005..e810b0071f 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -533,24 +533,24 @@ typedef struct VTDRootEntry VTDRootEntry;
/* Second Level Page Translation Pointer*/
#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
-/* Paging Structure common */
-#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7)
-/* Bits to decide the offset for each level */
-#define VTD_SL_LEVEL_BITS 9
-
/* Second Level Paging Structure */
-#define VTD_SL_PML4_LEVEL 4
-#define VTD_SL_PDP_LEVEL 3
-#define VTD_SL_PD_LEVEL 2
-#define VTD_SL_PT_LEVEL 1
-#define VTD_SL_PT_ENTRY_NR 512
-
/* Masks for Second Level Paging Entry */
#define VTD_SL_RW_MASK 3ULL
#define VTD_SL_R 1ULL
#define VTD_SL_W (1ULL << 1)
-#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
#define VTD_SL_IGN_COM 0xbff0000000000000ULL
#define VTD_SL_TM (1ULL << 62)
+/* Common for both First Level and Second Level */
+#define VTD_PML4_LEVEL 4
+#define VTD_PDP_LEVEL 3
+#define VTD_PD_LEVEL 2
+#define VTD_PT_LEVEL 1
+#define VTD_PT_ENTRY_NR 512
+#define VTD_PT_PAGE_SIZE_MASK (1ULL << 7)
+#define VTD_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
+
+/* Bits to decide the offset for each level */
+#define VTD_LEVEL_BITS 9
+
#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3d65bbbd56..100b1d7673 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -152,7 +152,7 @@ struct VTDIOTLBEntry {
uint64_t gfn;
uint16_t domain_id;
uint32_t pasid;
- uint64_t slpte;
+ uint64_t pte;
uint64_t mask;
uint8_t access_flags;
};
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1b07146e23..dc4c4415f7 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -48,7 +48,8 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
-#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
+#define VTD_PE_GET_SL_LEVEL(pe) \
+ (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
/*
* PCI bus number (or SID) is not reliable since the device is usaully
@@ -284,15 +285,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
}
/* The shift of an addr for a certain level of paging structure */
-static inline uint32_t vtd_slpt_level_shift(uint32_t level)
+static inline uint32_t vtd_pt_level_shift(uint32_t level)
{
assert(level != 0);
- return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
+ return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS;
}
-static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
+static inline uint64_t vtd_pt_level_page_mask(uint32_t level)
{
- return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
+ return ~((1ULL << vtd_pt_level_shift(level)) - 1);
}
static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
@@ -349,7 +350,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
{
- return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
+ return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
}
/* Must be called with IOMMU lock held */
@@ -360,7 +361,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
VTDIOTLBEntry *entry;
unsigned level;
- for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
+ for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) {
key.gfn = vtd_get_iotlb_gfn(addr, level);
key.level = level;
key.sid = source_id;
@@ -377,7 +378,7 @@ out:
/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
- uint16_t domain_id, hwaddr addr, uint64_t slpte,
+ uint16_t domain_id, hwaddr addr, uint64_t pte,
uint8_t access_flags, uint32_t level,
uint32_t pasid)
{
@@ -385,7 +386,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
- trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
+ trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id);
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
trace_vtd_iotlb_reset("iotlb exceeds size limit");
vtd_reset_iotlb_locked(s);
@@ -393,9 +394,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->gfn = gfn;
entry->domain_id = domain_id;
- entry->slpte = slpte;
+ entry->pte = pte;
entry->access_flags = access_flags;
- entry->mask = vtd_slpt_level_page_mask(level);
+ entry->mask = vtd_pt_level_page_mask(level);
entry->pasid = pasid;
key->gfn = gfn;
@@ -710,32 +711,32 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
}
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
+static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw)
{
- return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
+ return pte & VTD_PT_BASE_ADDR_MASK(aw);
}
/* Whether the pte indicates the address of the page frame */
-static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
+static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level)
{
- return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
+ return level == VTD_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK);
}
-/* Get the content of a spte located in @base_addr[@index] */
-static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
+/* Get the content of a pte located in @base_addr[@index] */
+static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index)
{
- uint64_t slpte;
+ uint64_t pte;
- assert(index < VTD_SL_PT_ENTRY_NR);
+ assert(index < VTD_PT_ENTRY_NR);
if (dma_memory_read(&address_space_memory,
- base_addr + index * sizeof(slpte),
- &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
- slpte = (uint64_t)-1;
- return slpte;
+ base_addr + index * sizeof(pte),
+ &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
+ pte = (uint64_t)-1;
+ return pte;
}
- slpte = le64_to_cpu(slpte);
- return slpte;
+ pte = le64_to_cpu(pte);
+ return pte;
}
/* Given an iova and the level of paging structure, return the offset
@@ -743,12 +744,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
*/
static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
{
- return (iova >> vtd_slpt_level_shift(level)) &
- ((1ULL << VTD_SL_LEVEL_BITS) - 1);
+ return (iova >> vtd_pt_level_shift(level)) &
+ ((1ULL << VTD_LEVEL_BITS) - 1);
}
/* Check Capability Register to see if the @level of page-table is supported */
-static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
+static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level)
{
return VTD_CAP_SAGAW_MASK & s->cap &
(1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
@@ -833,7 +834,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
pgtt = VTD_PE_GET_TYPE(pe);
if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
- !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
+ !vtd_is_sl_level_supported(s, VTD_PE_GET_SL_LEVEL(pe))) {
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
@@ -972,7 +973,7 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return VTD_PE_GET_LEVEL(&pe);
+ return VTD_PE_GET_SL_LEVEL(&pe);
}
return vtd_ce_get_level(ce);
@@ -1040,9 +1041,9 @@ static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
}
/* Return true if IOVA passes range check, otherwise false. */
-static inline bool vtd_iova_range_check(IntelIOMMUState *s,
- uint64_t iova, VTDContextEntry *ce,
- uint8_t aw, uint32_t pasid)
+static inline bool vtd_iova_sl_range_check(IntelIOMMUState *s,
+ uint64_t iova, VTDContextEntry *ce,
+ uint8_t aw, uint32_t pasid)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
@@ -1083,17 +1084,17 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
/*
* We should have caught a guest-mis-programmed level earlier,
- * via vtd_is_level_supported.
+ * via vtd_is_sl_level_supported.
*/
assert(level < VTD_SPTE_RSVD_LEN);
/*
- * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and
- * checked by vtd_is_last_slpte().
+ * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and
+ * checked by vtd_is_last_pte().
*/
assert(level);
- if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) &&
- (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) {
+ if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) &&
+ (slpte & VTD_PT_PAGE_SIZE_MASK)) {
/* large page */
rsvd_mask = vtd_spte_rsvd_large[level];
} else {
@@ -1119,7 +1120,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t access_right_check;
uint64_t xlat, size;
- if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) {
+ if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
"pasid=0x%" PRIx32 ")", __func__, iova, pasid);
return -VTD_FR_ADDR_BEYOND_MGAW;
@@ -1130,7 +1131,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
while (true) {
offset = vtd_iova_level_offset(iova, level);
- slpte = vtd_get_slpte(addr, offset);
+ slpte = vtd_get_pte(addr, offset);
if (slpte == (uint64_t)-1) {
error_report_once("%s: detected read error on DMAR slpte "
@@ -1161,17 +1162,17 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
return -VTD_FR_PAGING_ENTRY_RSVD;
}
- if (vtd_is_last_slpte(slpte, level)) {
+ if (vtd_is_last_pte(slpte, level)) {
*slptep = slpte;
*slpte_level = level;
break;
}
- addr = vtd_get_slpte_addr(slpte, aw_bits);
+ addr = vtd_get_pte_addr(slpte, aw_bits);
level--;
}
- xlat = vtd_get_slpte_addr(*slptep, aw_bits);
- size = ~vtd_slpt_level_page_mask(level) + 1;
+ xlat = vtd_get_pte_addr(*slptep, aw_bits);
+ size = ~vtd_pt_level_page_mask(level) + 1;
/*
* From VT-d spec 3.14: Untranslated requests and translation
@@ -1322,14 +1323,14 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
trace_vtd_page_walk_level(addr, level, start, end);
- subpage_size = 1ULL << vtd_slpt_level_shift(level);
- subpage_mask = vtd_slpt_level_page_mask(level);
+ subpage_size = 1ULL << vtd_pt_level_shift(level);
+ subpage_mask = vtd_pt_level_page_mask(level);
while (iova < end) {
iova_next = (iova & subpage_mask) + subpage_size;
offset = vtd_iova_level_offset(iova, level);
- slpte = vtd_get_slpte(addr, offset);
+ slpte = vtd_get_pte(addr, offset);
if (slpte == (uint64_t)-1) {
trace_vtd_page_walk_skip_read(iova, iova_next);
@@ -1352,12 +1353,12 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
*/
entry_valid = read_cur | write_cur;
- if (!vtd_is_last_slpte(slpte, level) && entry_valid) {
+ if (!vtd_is_last_pte(slpte, level) && entry_valid) {
/*
* This is a valid PDE (or even bigger than PDE). We need
* to walk one further level.
*/
- ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
+ ret = vtd_page_walk_level(vtd_get_pte_addr(slpte, info->aw),
iova, MIN(iova_next, end), level - 1,
read_cur, write_cur, info);
} else {
@@ -1374,7 +1375,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
event.entry.addr_mask = ~subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
- event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
+ event.entry.translated_addr = vtd_get_pte_addr(slpte, info->aw);
event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP :
IOMMU_NOTIFIER_UNMAP;
ret = vtd_page_walk_one(&event, info);
@@ -1408,11 +1409,11 @@ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
uint32_t level = vtd_get_iova_level(s, ce, pasid);
- if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) {
+ if (!vtd_iova_sl_range_check(s, start, ce, info->aw, pasid)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) {
+ if (!vtd_iova_sl_range_check(s, end, ce, info->aw, pasid)) {
/* Fix end so that it reaches the maximum */
end = vtd_iova_limit(s, ce, info->aw, pasid);
}
@@ -1527,7 +1528,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
/* Check if the programming of context-entry is valid */
if (!s->root_scalable &&
- !vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
+ !vtd_is_sl_level_supported(s, vtd_ce_get_level(ce))) {
error_report_once("%s: invalid context entry: hi=%"PRIx64
", lo=%"PRIx64" (level %d not supported)",
__func__, ce->hi, ce->lo,
@@ -1897,7 +1898,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
VTDContextEntry ce;
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
- uint64_t slpte, page_mask;
+ uint64_t pte, page_mask;
uint32_t level, pasid = vtd_as->pasid;
uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
@@ -1918,13 +1919,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry = &vtd_as->context_cache_entry;
- /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */
+ /* Try to fetch pte from IOTLB, we don't need RID2PASID logic */
if (!rid2pasid) {
iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte,
iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
+ pte = iotlb_entry->pte;
access_flags = iotlb_entry->access_flags;
page_mask = iotlb_entry->mask;
goto out;
@@ -1996,20 +1997,20 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
return true;
}
- /* Try to fetch slpte form IOTLB for RID2PASID slow path */
+ /* Try to fetch pte from IOTLB for RID2PASID slow path */
if (rid2pasid) {
iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte,
iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
+ pte = iotlb_entry->pte;
access_flags = iotlb_entry->access_flags;
page_mask = iotlb_entry->mask;
goto out;
}
}
- ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
+ ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
&reads, &writes, s->aw_bits, pasid);
if (ret_fr) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
@@ -2017,14 +2018,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
goto error;
}
- page_mask = vtd_slpt_level_page_mask(level);
+ page_mask = vtd_pt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
- addr, slpte, access_flags, level, pasid);
+ addr, pte, access_flags, level, pasid);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
- entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
+ entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
entry->perm = access_flags;
return true;
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 06/20] intel_iommu: Implement stage-1 translation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (4 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 05/20] intel_iommu: Rename slpte to pte Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 07/20] intel_iommu: Check if the input address is canonical Zhenzhong Duan
` (14 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Yi Sun, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
From: Yi Liu <yi.l.liu@intel.com>
This adds stage-1 page table walking to support stage-1 only
translation in scalable modern mode.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Co-developed-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu_internal.h | 34 +++++++
hw/i386/intel_iommu.c | 158 ++++++++++++++++++++++++++++++++-
2 files changed, 188 insertions(+), 4 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e810b0071f..86d3354198 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -320,6 +320,15 @@ typedef enum VTDFaultReason {
VTD_FR_PASID_ENTRY_P = 0x59,
VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */
+ /* Fail to access a first-level paging entry (not FS_PML4E) */
+ VTD_FR_FS_PAGING_ENTRY_INV = 0x70,
+ VTD_FR_FS_PAGING_ENTRY_P = 0x71,
+ /* Non-zero reserved field in present first-stage paging entry */
+ VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72,
+ VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */
+ VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */
+ VTD_FR_SM_WRITE = 0x85, /* No write permission */
+
/* Output address in the interrupt address range for scalable mode */
VTD_FR_SM_INTERRUPT_ADDR = 0x87,
VTD_FR_MAX, /* Guard */
@@ -438,6 +447,22 @@ typedef union VTDInvDesc VTDInvDesc;
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+/* Rsvd field masks for fpte */
+#define VTD_FS_UPPER_IGNORED 0xfff0000000000000ULL
+#define VTD_FPTE_PAGE_L1_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L2_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L3_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L4_RSVD_MASK(aw) \
+ (0x80ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+
+#define VTD_FPTE_LPAGE_L2_RSVD_MASK(aw) \
+ (0x1fe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_LPAGE_L3_RSVD_MASK(aw) \
+ (0x3fffe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+
/* Masks for PIOTLB Invalidate Descriptor */
#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4)
@@ -530,6 +555,15 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */
#define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_FLPM 3ULL
+#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL)
+
+/* First Level Paging Structure */
+/* Masks for First Level Paging Entry */
+#define VTD_FL_P 1ULL
+#define VTD_FL_RW (1ULL << 1)
+#define VTD_FL_US (1ULL << 2)
+
/* Second Level Page Translation Pointer*/
#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dc4c4415f7..dbd64d608f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -48,6 +48,8 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
+#define VTD_PE_GET_FL_LEVEL(pe) \
+ (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM))
#define VTD_PE_GET_SL_LEVEL(pe) \
(2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
@@ -755,6 +757,11 @@ static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level)
(1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
}
+static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
+{
+ return level == VTD_PML4_LEVEL;
+}
+
/* Return true if check passed, otherwise false */
static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
VTDPASIDEntry *pe)
@@ -838,6 +845,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
+ if (pgtt == VTD_SM_PASID_ENTRY_FLT &&
+ !vtd_is_fl_level_supported(s, VTD_PE_GET_FL_LEVEL(pe))) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
+ }
+
return 0;
}
@@ -973,7 +985,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return VTD_PE_GET_SL_LEVEL(&pe);
+ if (s->scalable_modern) {
+ return VTD_PE_GET_FL_LEVEL(&pe);
+ } else {
+ return VTD_PE_GET_SL_LEVEL(&pe);
+ }
}
return vtd_ce_get_level(ce);
@@ -1060,7 +1076,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ if (s->scalable_modern) {
+ return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+ } else {
+ return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ }
}
return vtd_ce_get_slpt_base(ce);
@@ -1800,6 +1820,12 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
[VTD_FR_PASID_ENTRY_P] = true,
[VTD_FR_PASID_TABLE_ENTRY_INV] = true,
+ [VTD_FR_FS_PAGING_ENTRY_INV] = true,
+ [VTD_FR_FS_PAGING_ENTRY_P] = true,
+ [VTD_FR_FS_PAGING_ENTRY_RSVD] = true,
+ [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true,
+ [VTD_FR_FS_PAGING_ENTRY_US] = true,
+ [VTD_FR_SM_WRITE] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_MAX] = false,
};
@@ -1862,6 +1888,113 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+/*
+ * Rsvd field masks for fpte:
+ * vtd_fpte_rsvd 4k pages
+ * vtd_fpte_rsvd_large large pages
+ *
+ * We support only 4-level page tables.
+ */
+#define VTD_FPTE_RSVD_LEN 5
+static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN];
+static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN];
+
+static bool vtd_flpte_nonzero_rsvd(uint64_t flpte, uint32_t level)
+{
+ uint64_t rsvd_mask;
+
+ /*
+ * We should have caught a guest-mis-programmed level earlier,
+ * via vtd_is_fl_level_supported.
+ */
+ assert(level < VTD_FPTE_RSVD_LEN);
+ /*
+ * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and
+ * checked by vtd_is_last_pte().
+ */
+ assert(level);
+
+ if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) &&
+ (flpte & VTD_PT_PAGE_SIZE_MASK)) {
+ /* large page */
+ rsvd_mask = vtd_fpte_rsvd_large[level];
+ } else {
+ rsvd_mask = vtd_fpte_rsvd[level];
+ }
+
+ return flpte & rsvd_mask;
+}
+
+static inline bool vtd_flpte_present(uint64_t flpte)
+{
+ return !!(flpte & VTD_FL_P);
+}
+
+/*
+ * Given the @iova, get relevant @flptep. @flpte_level will be the last level
+ * of the translation, can be used for deciding the size of large page.
+ */
+static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint64_t iova, bool is_write,
+ uint64_t *flptep, uint32_t *flpte_level,
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
+{
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
+ uint32_t offset;
+ uint64_t flpte;
+
+ while (true) {
+ offset = vtd_iova_level_offset(iova, level);
+ flpte = vtd_get_pte(addr, offset);
+
+ if (flpte == (uint64_t)-1) {
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
+ /* Invalid programming of pasid-entry */
+ return -VTD_FR_PASID_ENTRY_FSPTPTR_INV;
+ } else {
+ return -VTD_FR_FS_PAGING_ENTRY_INV;
+ }
+ }
+
+ if (!vtd_flpte_present(flpte)) {
+ *reads = false;
+ *writes = false;
+ return -VTD_FR_FS_PAGING_ENTRY_P;
+ }
+
+ /* No emulated device supports supervisor privilege request yet */
+ if (!(flpte & VTD_FL_US)) {
+ *reads = false;
+ *writes = false;
+ return -VTD_FR_FS_PAGING_ENTRY_US;
+ }
+
+ *reads = true;
+ *writes = (*writes) && (flpte & VTD_FL_RW);
+ if (is_write && !(flpte & VTD_FL_RW)) {
+ return -VTD_FR_SM_WRITE;
+ }
+ if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+ error_report_once("%s: detected flpte reserved non-zero "
+ "iova=0x%" PRIx64 ", level=0x%" PRIx32
+ "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
+ __func__, iova, level, flpte, pasid);
+ return -VTD_FR_FS_PAGING_ENTRY_RSVD;
+ }
+
+ if (vtd_is_last_pte(flpte, level)) {
+ *flptep = flpte;
+ *flpte_level = level;
+ return 0;
+ }
+
+ addr = vtd_get_pte_addr(flpte, aw_bits);
+ level--;
+ }
+}
+
static void vtd_report_fault(IntelIOMMUState *s,
int err, bool is_fpd_set,
uint16_t source_id,
@@ -2010,8 +2143,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
}
- ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
- &reads, &writes, s->aw_bits, pasid);
+ if (s->scalable_modern && s->root_scalable) {
+ ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ } else {
+ ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ }
if (ret_fr) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
addr, is_write, pasid != PCI_NO_PASID, pasid);
@@ -4287,6 +4425,18 @@ static void vtd_init(IntelIOMMUState *s)
vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
x86_iommu->dt_supported && s->stale_tm);
+ /*
+ * Rsvd field masks for fpte
+ */
+ vtd_fpte_rsvd[0] = ~0ULL;
+ vtd_fpte_rsvd[1] = VTD_FPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[2] = VTD_FPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[3] = VTD_FPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[4] = VTD_FPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+ vtd_fpte_rsvd_large[2] = VTD_FPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd_large[3] = VTD_FPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
+
if (s->scalable_mode || s->snoop_control) {
vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 07/20] intel_iommu: Check if the input address is canonical
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (5 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 06/20] intel_iommu: Implement stage-1 translation Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range Zhenzhong Duan
` (13 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
From: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
First stage translation must fail if the address to translate is
not canonical.
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 +
hw/i386/intel_iommu.c | 23 +++++++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 86d3354198..3e7365dfff 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -326,6 +326,7 @@ typedef enum VTDFaultReason {
/* Non-zero reserved field in present first-stage paging entry */
VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72,
VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */
+ VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/
VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */
VTD_FR_SM_WRITE = 0x85, /* No write permission */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index dbd64d608f..4cc4d668fc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1824,6 +1824,7 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_FS_PAGING_ENTRY_P] = true,
[VTD_FR_FS_PAGING_ENTRY_RSVD] = true,
[VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true,
+ [VTD_FR_FS_NON_CANONICAL] = true,
[VTD_FR_FS_PAGING_ENTRY_US] = true,
[VTD_FR_SM_WRITE] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
@@ -1930,6 +1931,22 @@ static inline bool vtd_flpte_present(uint64_t flpte)
return !!(flpte & VTD_FL_P);
}
+/* Return true if IOVA is canonical, otherwise false. */
+static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova,
+ VTDContextEntry *ce, uint32_t pasid)
+{
+ uint64_t iova_limit = vtd_iova_limit(s, ce, s->aw_bits, pasid);
+ uint64_t upper_bits_mask = ~(iova_limit - 1);
+ uint64_t upper_bits = iova & upper_bits_mask;
+ bool msb = ((iova & (iova_limit >> 1)) != 0);
+
+ if (msb) {
+ return upper_bits == upper_bits_mask;
+ } else {
+ return !upper_bits;
+ }
+}
+
/*
* Given the @iova, get relevant @flptep. @flpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
@@ -1945,6 +1962,12 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t offset;
uint64_t flpte;
+ if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
+ error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 ","
+ "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
+ return -VTD_FR_FS_NON_CANONICAL;
+ }
+
while (true) {
offset = vtd_iova_level_offset(iova, level);
flpte = vtd_get_pte(addr, offset);
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (6 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 07/20] intel_iommu: Check if the input address is canonical Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-13 6:55 ` CLEMENT MATHIEU--DRIF
2024-12-04 2:11 ` Jason Wang
2024-11-11 8:34 ` [PATCH v5 09/20] intel_iommu: Set accessed and dirty bits during stage-1 translation Zhenzhong Duan
` (12 subsequent siblings)
20 siblings, 2 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation
requests that result in an address in the interrupt range will be
blocked with condition code LGN.4 or SGN.8."
This applies to both stage-1 and stage-2 IOMMU page table, move the
check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1
page table could also be checked.
By this chance, update the comment with correct section number.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu.c | 48 ++++++++++++++++++++++---------------------
1 file changed, 25 insertions(+), 23 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4cc4d668fc..e651401db1 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1138,7 +1138,6 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
- uint64_t xlat, size;
if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
@@ -1191,28 +1190,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
level--;
}
- xlat = vtd_get_pte_addr(*slptep, aw_bits);
- size = ~vtd_pt_level_page_mask(level) + 1;
-
- /*
- * From VT-d spec 3.14: Untranslated requests and translation
- * requests that result in an address in the interrupt range will be
- * blocked with condition code LGN.4 or SGN.8.
- */
- if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
- xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
- return 0;
- } else {
- error_report_once("%s: xlat address is in interrupt range "
- "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
- "slpte=0x%" PRIx64 ", write=%d, "
- "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
- "pasid=0x%" PRIx32 ")",
- __func__, iova, level, slpte, is_write,
- xlat, size, pasid);
- return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
- -VTD_FR_INTERRUPT_ADDR;
- }
+ return 0;
}
typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private);
@@ -2064,6 +2042,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t access_flags;
bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
+ uint64_t xlat, size;
/*
* We have standalone memory region for interrupt addresses, we
@@ -2173,6 +2152,29 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
&reads, &writes, s->aw_bits, pasid);
}
+ if (!ret_fr) {
+ xlat = vtd_get_pte_addr(pte, s->aw_bits);
+ size = ~vtd_pt_level_page_mask(level) + 1;
+
+ /*
+ * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation
+ * requests that result in an address in the interrupt range will be
+ * blocked with condition code LGN.4 or SGN.8.
+ */
+ if ((xlat <= VTD_INTERRUPT_ADDR_LAST &&
+ xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) {
+ error_report_once("%s: xlat address is in interrupt range "
+ "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
+ "pte=0x%" PRIx64 ", write=%d, "
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
+ "pasid=0x%" PRIx32 ")",
+ __func__, addr, level, pte, is_write,
+ xlat, size, pasid);
+ ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
+ -VTD_FR_INTERRUPT_ADDR;
+ }
+ }
+
if (ret_fr) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
addr, is_write, pasid != PCI_NO_PASID, pasid);
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range
2024-11-11 8:34 ` [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range Zhenzhong Duan
@ 2024-11-13 6:55 ` CLEMENT MATHIEU--DRIF
2024-11-13 8:49 ` Duan, Zhenzhong
2024-12-04 2:11 ` Jason Wang
1 sibling, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-13 6:55 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Paolo Bonzini, Richard Henderson, Eduardo Habkost,
Marcel Apfelbaum
On 11/11/2024 09:34, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation
> requests that result in an address in the interrupt range will be
> blocked with condition code LGN.4 or SGN.8."
>
> This applies to both stage-1 and stage-2 IOMMU page table, move the
> check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1
> page table could also be checked.
>
> By this chance, update the comment with correct section number.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu.c | 48 ++++++++++++++++++++++---------------------
> 1 file changed, 25 insertions(+), 23 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 4cc4d668fc..e651401db1 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1138,7 +1138,6 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
> uint32_t offset;
> uint64_t slpte;
> uint64_t access_right_check;
> - uint64_t xlat, size;
>
> if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
> error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
> @@ -1191,28 +1190,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
> level--;
> }
>
> - xlat = vtd_get_pte_addr(*slptep, aw_bits);
> - size = ~vtd_pt_level_page_mask(level) + 1;
> -
> - /*
> - * From VT-d spec 3.14: Untranslated requests and translation
> - * requests that result in an address in the interrupt range will be
> - * blocked with condition code LGN.4 or SGN.8.
> - */
> - if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
> - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
> - return 0;
> - } else {
> - error_report_once("%s: xlat address is in interrupt range "
> - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
> - "slpte=0x%" PRIx64 ", write=%d, "
> - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
> - "pasid=0x%" PRIx32 ")",
> - __func__, iova, level, slpte, is_write,
> - xlat, size, pasid);
> - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
> - -VTD_FR_INTERRUPT_ADDR;
> - }
> + return 0;
> }
>
> typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private);
> @@ -2064,6 +2042,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> uint8_t access_flags;
> bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
> VTDIOTLBEntry *iotlb_entry;
> + uint64_t xlat, size;
>
> /*
> * We have standalone memory region for interrupt addresses, we
> @@ -2173,6 +2152,29 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
> &reads, &writes, s->aw_bits, pasid);
> }
> + if (!ret_fr) {
> + xlat = vtd_get_pte_addr(pte, s->aw_bits);
> + size = ~vtd_pt_level_page_mask(level) + 1;
> +
> + /*
> + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation
> + * requests that result in an address in the interrupt range will be
> + * blocked with condition code LGN.4 or SGN.8.
> + */
> + if ((xlat <= VTD_INTERRUPT_ADDR_LAST &&
> + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) {
> + error_report_once("%s: xlat address is in interrupt range "
> + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
> + "pte=0x%" PRIx64 ", write=%d, "
> + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
> + "pasid=0x%" PRIx32 ")",
> + __func__, addr, level, pte, is_write,
> + xlat, size, pasid);
Hi Zhenzhong,
Shouldn't we add the pgtt value to this trace as it can now be generated
by both FL and SL?
Thanks
cmd
> + ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
> + -VTD_FR_INTERRUPT_ADDR;
> + }
> + }
> +
> if (ret_fr) {
> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
> addr, is_write, pasid != PCI_NO_PASID, pasid);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* RE: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range
2024-11-13 6:55 ` CLEMENT MATHIEU--DRIF
@ 2024-11-13 8:49 ` Duan, Zhenzhong
2024-11-14 6:04 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-11-13 8:49 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Sent: Wednesday, November 13, 2024 2:56 PM
>Subject: Re: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with
>interrupt range
>
>
>On 11/11/2024 09:34, Zhenzhong Duan wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation
>> requests that result in an address in the interrupt range will be
>> blocked with condition code LGN.4 or SGN.8."
>>
>> This applies to both stage-1 and stage-2 IOMMU page table, move the
>> check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1
>> page table could also be checked.
>>
>> By this chance, update the comment with correct section number.
>>
>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu.c | 48 ++++++++++++++++++++++---------------------
>> 1 file changed, 25 insertions(+), 23 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 4cc4d668fc..e651401db1 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -1138,7 +1138,6 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s,
>VTDContextEntry *ce,
>> uint32_t offset;
>> uint64_t slpte;
>> uint64_t access_right_check;
>> - uint64_t xlat, size;
>>
>> if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
>> error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
>> @@ -1191,28 +1190,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s,
>VTDContextEntry *ce,
>> level--;
>> }
>>
>> - xlat = vtd_get_pte_addr(*slptep, aw_bits);
>> - size = ~vtd_pt_level_page_mask(level) + 1;
>> -
>> - /*
>> - * From VT-d spec 3.14: Untranslated requests and translation
>> - * requests that result in an address in the interrupt range will be
>> - * blocked with condition code LGN.4 or SGN.8.
>> - */
>> - if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
>> - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
>> - return 0;
>> - } else {
>> - error_report_once("%s: xlat address is in interrupt range "
>> - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
>> - "slpte=0x%" PRIx64 ", write=%d, "
>> - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
>> - "pasid=0x%" PRIx32 ")",
>> - __func__, iova, level, slpte, is_write,
>> - xlat, size, pasid);
>> - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
>> - -VTD_FR_INTERRUPT_ADDR;
>> - }
>> + return 0;
>> }
>>
>> typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void
>*private);
>> @@ -2064,6 +2042,7 @@ static bool
>vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>> uint8_t access_flags;
>> bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
>> VTDIOTLBEntry *iotlb_entry;
>> + uint64_t xlat, size;
>>
>> /*
>> * We have standalone memory region for interrupt addresses, we
>> @@ -2173,6 +2152,29 @@ static bool
>vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>> ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
>> &reads, &writes, s->aw_bits, pasid);
>> }
>> + if (!ret_fr) {
>> + xlat = vtd_get_pte_addr(pte, s->aw_bits);
>> + size = ~vtd_pt_level_page_mask(level) + 1;
>> +
>> + /*
>> + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation
>> + * requests that result in an address in the interrupt range will be
>> + * blocked with condition code LGN.4 or SGN.8.
>> + */
>> + if ((xlat <= VTD_INTERRUPT_ADDR_LAST &&
>> + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) {
>> + error_report_once("%s: xlat address is in interrupt range "
>> + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
>> + "pte=0x%" PRIx64 ", write=%d, "
>> + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
>> + "pasid=0x%" PRIx32 ")",
>> + __func__, addr, level, pte, is_write,
>> + xlat, size, pasid);
>
>Hi Zhenzhong,
>
>Shouldn't we add the pgtt value to this trace as it can now be generated
>by both FL and SL?
Hi Clement,
We don't always have a pgtt value to dump, e.g., when vIOMMU is in legacy mode.
Meanwhile we have other way to get pgtt if there is, e.g., from qemu cmdline.
Pgtt is also unrelated to the error itself, so I'd like to skip pgtt dump to be a bit simple.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range
2024-11-13 8:49 ` Duan, Zhenzhong
@ 2024-11-14 6:04 ` CLEMENT MATHIEU--DRIF
0 siblings, 0 replies; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-14 6:04 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
On 13/11/2024 09:49, Duan, Zhenzhong wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
>> -----Original Message-----
>> From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>> Sent: Wednesday, November 13, 2024 2:56 PM
>> Subject: Re: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with
>> interrupt range
>>
>>
>> On 11/11/2024 09:34, Zhenzhong Duan wrote:
>>> Caution: External email. Do not open attachments or click links, unless this
>> email comes from a known sender and you know the content is safe.
>>>
>>> Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation
>>> requests that result in an address in the interrupt range will be
>>> blocked with condition code LGN.4 or SGN.8."
>>>
>>> This applies to both stage-1 and stage-2 IOMMU page table, move the
>>> check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1
>>> page table could also be checked.
>>>
>>> By this chance, update the comment with correct section number.
>>>
>>> Suggested-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> hw/i386/intel_iommu.c | 48 ++++++++++++++++++++++---------------------
>>> 1 file changed, 25 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 4cc4d668fc..e651401db1 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -1138,7 +1138,6 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s,
>> VTDContextEntry *ce,
>>> uint32_t offset;
>>> uint64_t slpte;
>>> uint64_t access_right_check;
>>> - uint64_t xlat, size;
>>>
>>> if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
>>> error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
>>> @@ -1191,28 +1190,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s,
>> VTDContextEntry *ce,
>>> level--;
>>> }
>>>
>>> - xlat = vtd_get_pte_addr(*slptep, aw_bits);
>>> - size = ~vtd_pt_level_page_mask(level) + 1;
>>> -
>>> - /*
>>> - * From VT-d spec 3.14: Untranslated requests and translation
>>> - * requests that result in an address in the interrupt range will be
>>> - * blocked with condition code LGN.4 or SGN.8.
>>> - */
>>> - if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
>>> - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
>>> - return 0;
>>> - } else {
>>> - error_report_once("%s: xlat address is in interrupt range "
>>> - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
>>> - "slpte=0x%" PRIx64 ", write=%d, "
>>> - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
>>> - "pasid=0x%" PRIx32 ")",
>>> - __func__, iova, level, slpte, is_write,
>>> - xlat, size, pasid);
>>> - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
>>> - -VTD_FR_INTERRUPT_ADDR;
>>> - }
>>> + return 0;
>>> }
>>>
>>> typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void
>> *private);
>>> @@ -2064,6 +2042,7 @@ static bool
>> vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>> uint8_t access_flags;
>>> bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
>>> VTDIOTLBEntry *iotlb_entry;
>>> + uint64_t xlat, size;
>>>
>>> /*
>>> * We have standalone memory region for interrupt addresses, we
>>> @@ -2173,6 +2152,29 @@ static bool
>> vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>> ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
>>> &reads, &writes, s->aw_bits, pasid);
>>> }
>>> + if (!ret_fr) {
>>> + xlat = vtd_get_pte_addr(pte, s->aw_bits);
>>> + size = ~vtd_pt_level_page_mask(level) + 1;
>>> +
>>> + /*
>>> + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation
>>> + * requests that result in an address in the interrupt range will be
>>> + * blocked with condition code LGN.4 or SGN.8.
>>> + */
>>> + if ((xlat <= VTD_INTERRUPT_ADDR_LAST &&
>>> + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) {
>>> + error_report_once("%s: xlat address is in interrupt range "
>>> + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
>>> + "pte=0x%" PRIx64 ", write=%d, "
>>> + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
>>> + "pasid=0x%" PRIx32 ")",
>>> + __func__, addr, level, pte, is_write,
>>> + xlat, size, pasid);
>> Hi Zhenzhong,
>>
>> Shouldn't we add the pgtt value to this trace as it can now be generated
>> by both FL and SL?
> Hi Clement,
>
> We don't always have a pgtt value to dump, e.g., when vIOMMU is in legacy mode.
> Meanwhile we have other way to get pgtt if there is, e.g., from qemu cmdline.
> Pgtt is also unrelated to the error itself, so I'd like to skip pgtt dump to be a bit simple.
Hi,
pgtt is initialized just above and is set to SLT when the vIOMMU is in
legacy mode.
But it's fine, we can keep the patch as is!
Thanks
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
>
> Thanks
> Zhenzhong
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range
2024-11-11 8:34 ` [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range Zhenzhong Duan
2024-11-13 6:55 ` CLEMENT MATHIEU--DRIF
@ 2024-12-04 2:11 ` Jason Wang
1 sibling, 0 replies; 46+ messages in thread
From: Jason Wang @ 2024-12-04 2:11 UTC (permalink / raw)
To: Zhenzhong Duan
Cc: qemu-devel, alex.williamson, clg, eric.auger, mst, peterx, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Paolo Bonzini, Richard Henderson,
Eduardo Habkost, Marcel Apfelbaum
On Mon, Nov 11, 2024 at 4:38 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>
> Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation
> requests that result in an address in the interrupt range will be
> blocked with condition code LGN.4 or SGN.8."
>
> This applies to both stage-1 and stage-2 IOMMU page table, move the
> check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1
> page table could also be checked.
>
> By this chance, update the comment with correct section number.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 09/20] intel_iommu: Set accessed and dirty bits during stage-1 translation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (7 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 08/20] intel_iommu: Check stage-1 translation result with interrupt range Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 10/20] intel_iommu: Flush stage-1 cache in iotlb invalidation Zhenzhong Duan
` (11 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
From: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu_internal.h | 3 +++
hw/i386/intel_iommu.c | 25 ++++++++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 3e7365dfff..22dd3faf0c 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -332,6 +332,7 @@ typedef enum VTDFaultReason {
/* Output address in the interrupt address range for scalable mode */
VTD_FR_SM_INTERRUPT_ADDR = 0x87,
+ VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */
VTD_FR_MAX, /* Guard */
} VTDFaultReason;
@@ -564,6 +565,8 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_FL_P 1ULL
#define VTD_FL_RW (1ULL << 1)
#define VTD_FL_US (1ULL << 2)
+#define VTD_FL_A (1ULL << 5)
+#define VTD_FL_D (1ULL << 6)
/* Second Level Page Translation Pointer*/
#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e651401db1..5af61478ac 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1806,6 +1806,7 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_FS_PAGING_ENTRY_US] = true,
[VTD_FR_SM_WRITE] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
+ [VTD_FR_FS_BIT_UPDATE_FAILED] = true,
[VTD_FR_MAX] = false,
};
@@ -1925,6 +1926,20 @@ static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova,
}
}
+static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index,
+ uint64_t pte, uint64_t flag)
+{
+ if (pte & flag) {
+ return MEMTX_OK;
+ }
+ pte |= flag;
+ pte = cpu_to_le64(pte);
+ return dma_memory_write(&address_space_memory,
+ base_addr + index * sizeof(pte),
+ &pte, sizeof(pte),
+ MEMTXATTRS_UNSPECIFIED);
+}
+
/*
* Given the @iova, get relevant @flptep. @flpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
@@ -1938,7 +1953,7 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
- uint64_t flpte;
+ uint64_t flpte, flag_ad = VTD_FL_A;
if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 ","
@@ -1985,6 +2000,14 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
return -VTD_FR_FS_PAGING_ENTRY_RSVD;
}
+ if (vtd_is_last_pte(flpte, level) && is_write) {
+ flag_ad |= VTD_FL_D;
+ }
+
+ if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) {
+ return -VTD_FR_FS_BIT_UPDATE_FAILED;
+ }
+
if (vtd_is_last_pte(flpte, level)) {
*flptep = flpte;
*flpte_level = level;
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 10/20] intel_iommu: Flush stage-1 cache in iotlb invalidation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (8 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 09/20] intel_iommu: Set accessed and dirty bits during stage-1 translation Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 11/20] intel_iommu: Process PASID-based " Zhenzhong Duan
` (10 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
According to spec, Page-Selective-within-Domain Invalidation (11b):
1. IOTLB entries caching second-stage mappings (PGTT=010b) or pass-through
(PGTT=100b) mappings associated with the specified domain-id and the
input-address range are invalidated.
2. IOTLB entries caching first-stage (PGTT=001b) or nested (PGTT=011b)
mapping associated with specified domain-id are invalidated.
So per spec definition the Page-Selective-within-Domain Invalidation
needs to flush first stage and nested cached IOTLB enties as well.
We don't support nested yet and pass-through mapping is never cached,
so what in iotlb cache are only first-stage and second-stage mappings.
Add a tag pgtt in VTDIOTLBEntry to mark PGTT type of the mapping and
invalidate entries based on PGTT type.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
include/hw/i386/intel_iommu.h | 1 +
hw/i386/intel_iommu.c | 27 +++++++++++++++++++++------
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 100b1d7673..13e8680b87 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -155,6 +155,7 @@ struct VTDIOTLBEntry {
uint64_t pte;
uint64_t mask;
uint8_t access_flags;
+ uint8_t pgtt;
};
/* VT-d Source-ID Qualifier types */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5af61478ac..4b0fb1f83d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -305,9 +305,21 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
- return (entry->domain_id == info->domain_id) &&
- (((entry->gfn & info->mask) == gfn) ||
- (entry->gfn == gfn_tlb));
+
+ if (entry->domain_id != info->domain_id) {
+ return false;
+ }
+
+ /*
+ * According to spec, IOTLB entries caching first-stage (PGTT=001b) or
+ * nested (PGTT=011b) mapping associated with specified domain-id are
+ * invalidated. Nested isn't supported yet, so only need to check 001b.
+ */
+ if (entry->pgtt == VTD_SM_PASID_ENTRY_FLT) {
+ return true;
+ }
+
+ return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb;
}
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
@@ -382,7 +394,7 @@ out:
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t pte,
uint8_t access_flags, uint32_t level,
- uint32_t pasid)
+ uint32_t pasid, uint8_t pgtt)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
@@ -400,6 +412,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->access_flags = access_flags;
entry->mask = vtd_pt_level_page_mask(level);
entry->pasid = pasid;
+ entry->pgtt = pgtt;
key->gfn = gfn;
key->sid = source_id;
@@ -2062,7 +2075,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
- uint8_t access_flags;
+ uint8_t access_flags, pgtt;
bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
uint64_t xlat, size;
@@ -2171,9 +2184,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
if (s->scalable_modern && s->root_scalable) {
ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
&reads, &writes, s->aw_bits, pasid);
+ pgtt = VTD_SM_PASID_ENTRY_FLT;
} else {
ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
&reads, &writes, s->aw_bits, pasid);
+ pgtt = VTD_SM_PASID_ENTRY_SLT;
}
if (!ret_fr) {
xlat = vtd_get_pte_addr(pte, s->aw_bits);
@@ -2207,7 +2222,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
page_mask = vtd_pt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
- addr, pte, access_flags, level, pasid);
+ addr, pte, access_flags, level, pasid, pgtt);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 11/20] intel_iommu: Process PASID-based iotlb invalidation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (9 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 10/20] intel_iommu: Flush stage-1 cache in iotlb invalidation Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 12/20] intel_iommu: Add an internal API to find an address space with PASID Zhenzhong Duan
` (9 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
PASID-based iotlb (piotlb) is used during walking Intel
VT-d stage-1 page table.
This emulates the stage-1 page table iotlb invalidation requested
by a PASID-based IOTLB Invalidate Descriptor (P_IOTLB).
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
hw/i386/intel_iommu_internal.h | 3 +++
hw/i386/intel_iommu.c | 43 ++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 22dd3faf0c..5e4e563e62 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -471,6 +471,9 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4)
#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PIOTLB_AM(val) ((val) & 0x3fULL)
+#define VTD_INV_DESC_PIOTLB_IH(val) (((val) >> 6) & 0x1)
+#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL)
#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4b0fb1f83d..71ef5d741a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -322,6 +322,28 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb;
}
+static gboolean vtd_hash_remove_by_page_piotlb(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+ VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+ uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
+ uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
+
+ /*
+ * According to spec, PASID-based-IOTLB Invalidation in page granularity
+ * doesn't invalidate IOTLB entries caching second-stage (PGTT=010b)
+ * or pass-through (PGTT=100b) mappings. Nested isn't supported yet,
+ * so only need to check first-stage (PGTT=001b) mappings.
+ */
+ if (entry->pgtt != VTD_SM_PASID_ENTRY_FLT) {
+ return false;
+ }
+
+ return entry->domain_id == info->domain_id && entry->pasid == info->pasid &&
+ ((entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb);
+}
+
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
* IntelIOMMUState to 1. Must be called with IOMMU lock held.
*/
@@ -2937,11 +2959,29 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
}
}
+static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
+ uint32_t pasid, hwaddr addr, uint8_t am)
+{
+ VTDIOTLBPageInvInfo info;
+
+ info.domain_id = domain_id;
+ info.pasid = pasid;
+ info.addr = addr;
+ info.mask = ~((1 << am) - 1);
+
+ vtd_iommu_lock(s);
+ g_hash_table_foreach_remove(s->iotlb,
+ vtd_hash_remove_by_page_piotlb, &info);
+ vtd_iommu_unlock(s);
+}
+
static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
uint16_t domain_id;
uint32_t pasid;
+ hwaddr addr;
+ uint8_t am;
uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0,
VTD_INV_DESC_PIOTLB_RSVD_VAL1,
VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
@@ -2959,6 +2999,9 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
break;
case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
+ am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]);
+ addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]);
+ vtd_piotlb_page_invalidate(s, domain_id, pasid, addr, am);
break;
default:
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 12/20] intel_iommu: Add an internal API to find an address space with PASID
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (10 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 11/20] intel_iommu: Process PASID-based " Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 13/20] intel_iommu: Add support for PASID-based device IOTLB invalidation Zhenzhong Duan
` (8 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
From: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
This will be used to implement the device IOTLB invalidation
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
hw/i386/intel_iommu.c | 38 +++++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 71ef5d741a..599d017b18 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -70,6 +70,11 @@ struct vtd_hiod_key {
uint8_t devfn;
};
+struct vtd_as_raw_key {
+ uint16_t sid;
+ uint32_t pasid;
+};
+
struct vtd_iotlb_key {
uint64_t gfn;
uint32_t pasid;
@@ -1859,29 +1864,32 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
}
-static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
- gpointer user_data)
+static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value,
+ gpointer user_data)
{
struct vtd_as_key *as_key = (struct vtd_as_key *)key;
- uint16_t target_sid = *(uint16_t *)user_data;
+ struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data;
uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
- return sid == target_sid;
+
+ return (as_key->pasid == target->pasid) && (sid == target->sid);
}
-static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
+ uint16_t sid,
+ uint32_t pasid)
{
- uint8_t bus_num = PCI_BUS_NUM(sid);
- VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
-
- if (vtd_as &&
- (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
- return vtd_as;
- }
+ struct vtd_as_raw_key key = {
+ .sid = sid,
+ .pasid = pasid
+ };
- vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid);
- s->vtd_as_cache[bus_num] = vtd_as;
+ return g_hash_table_find(s->vtd_address_spaces,
+ vtd_find_as_by_sid_and_pasid, &key);
+}
- return vtd_as;
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+ return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
}
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 13/20] intel_iommu: Add support for PASID-based device IOTLB invalidation
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (11 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 12/20] intel_iommu: Add an internal API to find an address space with PASID Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-12-04 3:27 ` Jason Wang
2024-11-11 8:34 ` [PATCH v5 14/20] intel_iommu: piotlb invalidation should notify unmap Zhenzhong Duan
` (7 subsequent siblings)
20 siblings, 1 reply; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Paolo Bonzini,
Richard Henderson, Eduardo Habkost, Marcel Apfelbaum
From: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 11 ++++++++
hw/i386/intel_iommu.c | 50 ++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 5e4e563e62..2c977aa7da 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -385,6 +385,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
#define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */
#define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */
+#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
/* Masks for Invalidation Wait Descriptor*/
@@ -426,6 +427,16 @@ typedef union VTDInvDesc VTDInvDesc;
/* Masks for Interrupt Entry Invalidate Descriptor */
#define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL
+/* Masks for PASID based Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \
+ 0xfffffffffffff000ULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xffffULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL
+
/* Rsvd field masks for spte */
#define VTD_SPTE_SNP 0x800ULL
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 599d017b18..f80d60c16e 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3075,6 +3075,49 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
}
+static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ uint16_t sid;
+ VTDAddressSpace *vtd_dev_as;
+ bool size;
+ bool global;
+ hwaddr addr;
+ uint32_t pasid;
+ uint64_t mask[4] = {VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0,
+ VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+ __func__, "device piotlb inv")) {
+ return false;
+ }
+
+ global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi);
+ size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi);
+ addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi);
+ sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
+ if (global) {
+ QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
+ if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
+ (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
+ vtd_dev_as->devfn) == sid)) {
+ do_invalidate_device_tlb(vtd_dev_as, size, addr);
+ }
+ }
+ } else {
+ pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo);
+ vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid);
+ if (!vtd_dev_as) {
+ return true;
+ }
+
+ do_invalidate_device_tlb(vtd_dev_as, size, addr);
+ }
+
+ return true;
+}
+
static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
@@ -3161,6 +3204,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_DEV_PIOTLB:
+ trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_device_piotlb_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
case VTD_INV_DESC_DEVICE:
trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 13/20] intel_iommu: Add support for PASID-based device IOTLB invalidation
2024-11-11 8:34 ` [PATCH v5 13/20] intel_iommu: Add support for PASID-based device IOTLB invalidation Zhenzhong Duan
@ 2024-12-04 3:27 ` Jason Wang
0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2024-12-04 3:27 UTC (permalink / raw)
To: Zhenzhong Duan
Cc: qemu-devel, alex.williamson, clg, eric.auger, mst, peterx, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Paolo Bonzini, Richard Henderson,
Eduardo Habkost, Marcel Apfelbaum
On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>
> From: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
>
> Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 14/20] intel_iommu: piotlb invalidation should notify unmap
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (12 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 13/20] intel_iommu: Add support for PASID-based device IOTLB invalidation Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes Zhenzhong Duan
` (6 subsequent siblings)
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
This is used by some emulated devices which caches address
translation result. When piotlb invalidation issued in guest,
those caches should be refreshed.
There is already a similar implementation in iotlb invalidation.
So update vtd_iotlb_page_invalidate_notify() to make it work
also for piotlb invalidation.
For device that does not implement ATS capability or disable
it but still caches the translation result, it is better to
implement ATS cap or enable it if there is need to cache the
translation result.
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
---
hw/i386/intel_iommu.c | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f80d60c16e..b921793c3a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2450,8 +2450,13 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
}
}
+/*
+ * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID
+ * is passed as parameter. Piotlb invalidation supports pasid, pasid in its
+ * descriptor is passed which should not be PCI_NO_PASID.
+ */
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
- uint16_t domain_id, hwaddr addr,
+ uint16_t domain_id, hwaddr addr,
uint8_t am, uint32_t pasid)
{
VTDAddressSpace *vtd_as;
@@ -2460,19 +2465,37 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
- if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) {
- continue;
- }
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ uint32_t rid2pasid = PCI_NO_PASID;
+
+ if (s->root_scalable) {
+ rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+ }
+
+ /*
+ * In legacy mode, vtd_as->pasid == pasid is always true.
+ * In scalable mode, for vtd address space backing a PCI
+ * device without pasid, needs to compare pasid with
+ * rid2pasid of this device.
+ */
+ if (!(vtd_as->pasid == pasid ||
+ (vtd_as->pasid == PCI_NO_PASID && pasid == rid2pasid))) {
+ continue;
+ }
+
if (vtd_as_has_map_notifier(vtd_as)) {
/*
- * As long as we have MAP notifications registered in
- * any of our IOMMU notifiers, we need to sync the
- * shadow page table.
+ * In non-modern mode, as long as we have MAP notifications
+ * registered in any of our IOMMU notifiers, we need to
+ * sync the shadow page table. In scalable modern mode,
+ * VFIO device attaches to nested page table instead of
+ * shadow page table, so no need to sync.
*/
- vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
+ if (!s->scalable_modern || !s->root_scalable) {
+ vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
+ }
} else {
/*
* For UNMAP-only notifiers, we don't need to walk the
@@ -2960,7 +2983,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
continue;
}
- if (!s->scalable_modern) {
+ if (!s->scalable_modern || !vtd_as_has_map_notifier(vtd_as)) {
vtd_address_space_sync(vtd_as);
}
}
@@ -2981,6 +3004,8 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
g_hash_table_foreach_remove(s->iotlb,
vtd_hash_remove_by_page_piotlb, &info);
vtd_iommu_unlock(s);
+
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
}
static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (13 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 14/20] intel_iommu: piotlb invalidation should notify unmap Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-20 6:09 ` CLEMENT MATHIEU--DRIF
2024-12-04 3:27 ` Jason Wang
2024-11-11 8:34 ` [PATCH v5 16/20] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2 Zhenzhong Duan
` (5 subsequent siblings)
20 siblings, 2 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Igor Mammedov, Ani Sinha
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..46f80be9ca 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
/* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/x86/q35/DMAR.dmar",
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes
2024-11-11 8:34 ` [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes Zhenzhong Duan
@ 2024-11-20 6:09 ` CLEMENT MATHIEU--DRIF
2024-12-04 3:27 ` Jason Wang
1 sibling, 0 replies; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-20 6:09 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Igor Mammedov, Ani Sinha
Hi,
@Michael, are this patch and patch 17/20 ok for you?
Thanks,
cmd
On 11/11/2024 09:34, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> tests/qtest/bios-tables-test-allowed-diff.h | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
> index dfb8523c8b..46f80be9ca 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1 +1,2 @@
> /* List of comma-separated changed AML files to ignore */
> +"tests/data/acpi/x86/q35/DMAR.dmar",
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes
2024-11-11 8:34 ` [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes Zhenzhong Duan
2024-11-20 6:09 ` CLEMENT MATHIEU--DRIF
@ 2024-12-04 3:27 ` Jason Wang
1 sibling, 0 replies; 46+ messages in thread
From: Jason Wang @ 2024-12-04 3:27 UTC (permalink / raw)
To: Zhenzhong Duan
Cc: qemu-devel, alex.williamson, clg, eric.auger, mst, peterx, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Igor Mammedov, Ani Sinha
On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> tests/qtest/bios-tables-test-allowed-diff.h | 1 +
> 1 file changed, 1 insertion(+)
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 16/20] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (14 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 15/20] tests/acpi: q35: allow DMAR acpi table changes Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-12-04 3:28 ` Jason Wang
2024-11-11 8:34 ` [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR Zhenzhong Duan
` (4 subsequent siblings)
20 siblings, 1 reply; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
According to VTD spec, stage-1 page table could support 4-level and
5-level paging.
However, 5-level paging translation emulation is unsupported yet.
That means the only supported value for aw_bits is 48. So default
aw_bits to 48 in scalable modern mode.
For legacy and scalable legacy modes, 48 is the default choice for
modern OS when both 48 and 39 are supported. So it makes sense to
set default to 48 for these two modes too starting from QEMU 9.2.
Use pc_compat_9_1 to handle the compatibility for machines before
9.2.
Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
include/hw/i386/intel_iommu.h | 2 +-
hw/i386/pc.c | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 13e8680b87..09ce707930 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -45,7 +45,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE)
#define DMAR_REG_SIZE 0x230
#define VTD_HOST_AW_39BIT 39
#define VTD_HOST_AW_48BIT 48
-#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT
+#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT
#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1)
#define DMAR_REPORT_F_INTR (1)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 830614d930..bdb67f1fd4 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -83,6 +83,7 @@ GlobalProperty pc_compat_9_1[] = {
{ "ICH9-LPC", "x-smi-swsmi-timer", "off" },
{ "ICH9-LPC", "x-smi-periodic-timer", "off" },
{ TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" },
+ { TYPE_INTEL_IOMMU_DEVICE, "aw-bits", "39" },
};
const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1);
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 16/20] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2
2024-11-11 8:34 ` [PATCH v5 16/20] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2 Zhenzhong Duan
@ 2024-12-04 3:28 ` Jason Wang
0 siblings, 0 replies; 46+ messages in thread
From: Jason Wang @ 2024-12-04 3:28 UTC (permalink / raw)
To: Zhenzhong Duan
Cc: qemu-devel, alex.williamson, clg, eric.auger, mst, peterx, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Marcel Apfelbaum, Paolo Bonzini,
Richard Henderson, Eduardo Habkost
On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>
> According to VTD spec, stage-1 page table could support 4-level and
> 5-level paging.
>
> However, 5-level paging translation emulation is unsupported yet.
> That means the only supported value for aw_bits is 48. So default
> aw_bits to 48 in scalable modern mode.
>
> For legacy and scalable legacy modes, 48 is the default choice for
> modern OS when both 48 and 39 are supported. So it makes sense to
> set default to 48 for these two modes too starting from QEMU 9.2.
> Use pc_compat_9_1 to handle the compatibility for machines before
> 9.2.
>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
> Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> ---
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (15 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 16/20] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2 Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-13 7:16 ` CLEMENT MATHIEU--DRIF
2024-11-11 8:34 ` [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode Zhenzhong Duan
` (3 subsequent siblings)
20 siblings, 1 reply; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Igor Mammedov, Ani Sinha
Differences:
@@ -1,39 +1,39 @@
/*
* Intel ACPI Component Architecture
* AML/ASL+ Disassembler version 20200925 (64-bit version)
* Copyright (c) 2000 - 2020 Intel Corporation
*
- * Disassembly of tests/data/acpi/x86/q35/DMAR.dmar, Mon Nov 11 15:31:18 2024
+ * Disassembly of /tmp/aml-SPJ4W2, Mon Nov 11 15:31:18 2024
*
* ACPI Data Table [DMAR]
*
* Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
*/
[000h 0000 4] Signature : "DMAR" [DMA Remapping table]
[004h 0004 4] Table Length : 00000078
[008h 0008 1] Revision : 01
-[009h 0009 1] Checksum : 15
+[009h 0009 1] Checksum : 0C
[00Ah 0010 6] Oem ID : "BOCHS "
[010h 0016 8] Oem Table ID : "BXPC "
[018h 0024 4] Oem Revision : 00000001
[01Ch 0028 4] Asl Compiler ID : "BXPC"
[020h 0032 4] Asl Compiler Revision : 00000001
-[024h 0036 1] Host Address Width : 26
+[024h 0036 1] Host Address Width : 2F
[025h 0037 1] Flags : 01
[026h 0038 10] Reserved : 00 00 00 00 00 00 00 00 00 00
[030h 0048 2] Subtable Type : 0000 [Hardware Unit Definition]
[032h 0050 2] Length : 0040
[034h 0052 1] Flags : 00
[035h 0053 1] Reserved : 00
[036h 0054 2] PCI Segment Number : 0000
[038h 0056 8] Register Base Address : 00000000FED90000
[040h 0064 1] Device Scope Type : 03 [IOAPIC Device]
[041h 0065 1] Entry Length : 08
[042h 0066 2] Reserved : 0000
[044h 0068 1] Enumeration ID : 00
[045h 0069 1] PCI Bus Number : FF
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 1 -
tests/data/acpi/x86/q35/DMAR.dmar | Bin 120 -> 120 bytes
2 files changed, 1 deletion(-)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index 46f80be9ca..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,2 +1 @@
/* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/x86/q35/DMAR.dmar",
diff --git a/tests/data/acpi/x86/q35/DMAR.dmar b/tests/data/acpi/x86/q35/DMAR.dmar
index 0dca6e68ad8a8ca5b981bcfbc745385a63e9f216..0c05976715c6f2f6ec46ef6d37790f86a392b5ea 100644
GIT binary patch
delta 21
ccmb=Z;BxVG460yYU|{5#$R)+7KT$Op05(qqk^lez
delta 21
ccmb=Z;BxVG460yYU|<xT$R)+7Hc>Sg05*ICk^lez
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR
2024-11-11 8:34 ` [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR Zhenzhong Duan
@ 2024-11-13 7:16 ` CLEMENT MATHIEU--DRIF
2024-11-13 8:50 ` Duan, Zhenzhong
0 siblings, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-13 7:16 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Igor Mammedov, Ani Sinha
Hi Zhenzhong,
Ack
>cmd
On 11/11/2024 09:34, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> Differences:
>
> @@ -1,39 +1,39 @@
> /*
> * Intel ACPI Component Architecture
> * AML/ASL+ Disassembler version 20200925 (64-bit version)
> * Copyright (c) 2000 - 2020 Intel Corporation
> *
> - * Disassembly of tests/data/acpi/x86/q35/DMAR.dmar, Mon Nov 11 15:31:18 2024
> + * Disassembly of /tmp/aml-SPJ4W2, Mon Nov 11 15:31:18 2024
> *
> * ACPI Data Table [DMAR]
> *
> * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
> */
>
> [000h 0000 4] Signature : "DMAR" [DMA Remapping table]
> [004h 0004 4] Table Length : 00000078
> [008h 0008 1] Revision : 01
> -[009h 0009 1] Checksum : 15
> +[009h 0009 1] Checksum : 0C
> [00Ah 0010 6] Oem ID : "BOCHS "
> [010h 0016 8] Oem Table ID : "BXPC "
> [018h 0024 4] Oem Revision : 00000001
> [01Ch 0028 4] Asl Compiler ID : "BXPC"
> [020h 0032 4] Asl Compiler Revision : 00000001
>
> -[024h 0036 1] Host Address Width : 26
> +[024h 0036 1] Host Address Width : 2F
> [025h 0037 1] Flags : 01
> [026h 0038 10] Reserved : 00 00 00 00 00 00 00 00 00 00
>
> [030h 0048 2] Subtable Type : 0000 [Hardware Unit Definition]
> [032h 0050 2] Length : 0040
>
> [034h 0052 1] Flags : 00
> [035h 0053 1] Reserved : 00
> [036h 0054 2] PCI Segment Number : 0000
> [038h 0056 8] Register Base Address : 00000000FED90000
>
> [040h 0064 1] Device Scope Type : 03 [IOAPIC Device]
> [041h 0065 1] Entry Length : 08
> [042h 0066 2] Reserved : 0000
> [044h 0068 1] Enumeration ID : 00
> [045h 0069 1] PCI Bus Number : FF
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> tests/qtest/bios-tables-test-allowed-diff.h | 1 -
> tests/data/acpi/x86/q35/DMAR.dmar | Bin 120 -> 120 bytes
> 2 files changed, 1 deletion(-)
>
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
> index 46f80be9ca..dfb8523c8b 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1,2 +1 @@
> /* List of comma-separated changed AML files to ignore */
> -"tests/data/acpi/x86/q35/DMAR.dmar",
> diff --git a/tests/data/acpi/x86/q35/DMAR.dmar b/tests/data/acpi/x86/q35/DMAR.dmar
> index 0dca6e68ad8a8ca5b981bcfbc745385a63e9f216..0c05976715c6f2f6ec46ef6d37790f86a392b5ea 100644
> GIT binary patch
> delta 21
> ccmb=Z;BxVG460yYU|{5#$R)+7KT$Op05(qqk^lez
>
> delta 21
> ccmb=Z;BxVG460yYU|<xT$R)+7Hc>Sg05*ICk^lez
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* RE: [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR
2024-11-13 7:16 ` CLEMENT MATHIEU--DRIF
@ 2024-11-13 8:50 ` Duan, Zhenzhong
0 siblings, 0 replies; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-11-13 8:50 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Igor Mammedov, Ani Sinha
Hi Clement,
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Sent: Wednesday, November 13, 2024 3:17 PM
>Subject: Re: [PATCH v5 17/20] tests/acpi: q35: Update host address width in
>DMAR
>
>Hi Zhenzhong,
>
>Ack
I presume you mean:
Acked-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Thanks
Zhenzhong
>
> >cmd
>
>
>On 11/11/2024 09:34, Zhenzhong Duan wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> Differences:
>>
>> @@ -1,39 +1,39 @@
>> /*
>> * Intel ACPI Component Architecture
>> * AML/ASL+ Disassembler version 20200925 (64-bit version)
>> * Copyright (c) 2000 - 2020 Intel Corporation
>> *
>> - * Disassembly of tests/data/acpi/x86/q35/DMAR.dmar, Mon Nov 11 15:31:18
>2024
>> + * Disassembly of /tmp/aml-SPJ4W2, Mon Nov 11 15:31:18 2024
>> *
>> * ACPI Data Table [DMAR]
>> *
>> * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
>> */
>>
>> [000h 0000 4] Signature : "DMAR" [DMA Remapping table]
>> [004h 0004 4] Table Length : 00000078
>> [008h 0008 1] Revision : 01
>> -[009h 0009 1] Checksum : 15
>> +[009h 0009 1] Checksum : 0C
>> [00Ah 0010 6] Oem ID : "BOCHS "
>> [010h 0016 8] Oem Table ID : "BXPC "
>> [018h 0024 4] Oem Revision : 00000001
>> [01Ch 0028 4] Asl Compiler ID : "BXPC"
>> [020h 0032 4] Asl Compiler Revision : 00000001
>>
>> -[024h 0036 1] Host Address Width : 26
>> +[024h 0036 1] Host Address Width : 2F
>> [025h 0037 1] Flags : 01
>> [026h 0038 10] Reserved : 00 00 00 00 00 00 00 00 00 00
>>
>> [030h 0048 2] Subtable Type : 0000 [Hardware Unit Definition]
>> [032h 0050 2] Length : 0040
>>
>> [034h 0052 1] Flags : 00
>> [035h 0053 1] Reserved : 00
>> [036h 0054 2] PCI Segment Number : 0000
>> [038h 0056 8] Register Base Address : 00000000FED90000
>>
>> [040h 0064 1] Device Scope Type : 03 [IOAPIC Device]
>> [041h 0065 1] Entry Length : 08
>> [042h 0066 2] Reserved : 0000
>> [044h 0068 1] Enumeration ID : 00
>> [045h 0069 1] PCI Bus Number : FF
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> tests/qtest/bios-tables-test-allowed-diff.h | 1 -
>> tests/data/acpi/x86/q35/DMAR.dmar | Bin 120 -> 120 bytes
>> 2 files changed, 1 deletion(-)
>>
>> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-
>test-allowed-diff.h
>> index 46f80be9ca..dfb8523c8b 100644
>> --- a/tests/qtest/bios-tables-test-allowed-diff.h
>> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
>> @@ -1,2 +1 @@
>> /* List of comma-separated changed AML files to ignore */
>> -"tests/data/acpi/x86/q35/DMAR.dmar",
>> diff --git a/tests/data/acpi/x86/q35/DMAR.dmar
>b/tests/data/acpi/x86/q35/DMAR.dmar
>> index
>0dca6e68ad8a8ca5b981bcfbc745385a63e9f216..0c05976715c6f2f6ec46ef6d377
>90f86a392b5ea 100644
>> GIT binary patch
>> delta 21
>> ccmb=Z;BxVG460yYU|{5#$R)+7KT$Op05(qqk^lez
>>
>> delta 21
>> ccmb=Z;BxVG460yYU|<xT$R)+7Hc>Sg05*ICk^lez
>>
>> --
>> 2.34.1
>>
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (16 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 17/20] tests/acpi: q35: Update host address width in DMAR Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-19 6:54 ` CLEMENT MATHIEU--DRIF
2024-12-04 3:34 ` Jason Wang
2024-11-11 8:34 ` [PATCH v5 19/20] intel_iommu: Introduce a property to control FS1GP cap bit setting Zhenzhong Duan
` (2 subsequent siblings)
20 siblings, 2 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
This vIOMMU implementation wants to simplify it with a new property "x-flts".
When enabled in scalable mode, first stage translation also known as scalable
modern mode is supported. When enabled in legacy mode, throw out error.
With scalable modern mode exposed to user, also accurate the pasid entry
check in vtd_pe_type_check().
Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 ++
hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2c977aa7da..e8b211e8b0 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
+#define VTD_ECAP_FLTS (1ULL << 47)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_FS1GP (1ULL << 56)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b921793c3a..a7a81aebee 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
}
/* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
- case VTD_SM_PASID_ENTRY_SLT:
- return true;
- case VTD_SM_PASID_ENTRY_PT:
- return x86_iommu->pt_supported;
case VTD_SM_PASID_ENTRY_FLT:
+ return !!(s->ecap & VTD_ECAP_FLTS);
+ case VTD_SM_PASID_ENTRY_SLT:
+ return !!(s->ecap & VTD_ECAP_SLTS);
case VTD_SM_PASID_ENTRY_NESTED:
+ /* Not support NESTED page table type yet */
+ return false;
+ case VTD_SM_PASID_ENTRY_PT:
+ return !!(s->ecap & VTD_ECAP_PT);
default:
/* Unknown type */
return false;
@@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
@@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
}
/* Do translation type check */
- if (!vtd_pe_type_check(x86_iommu, pe)) {
+ if (!vtd_pe_type_check(s, pe)) {
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
@@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+ DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
@@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
- if (s->scalable_mode) {
+ if (s->scalable_modern) {
+ s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
+ s->cap |= VTD_CAP_FS1GP;
+ } else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
@@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
}
}
+ if (!s->scalable_mode && s->scalable_modern) {
+ error_setg(errp, "Legacy mode: not support x-flts=on");
+ return false;
+ }
+
if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
s->aw_bits != VTD_HOST_AW_48BIT) {
error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-11 8:34 ` [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode Zhenzhong Duan
@ 2024-11-19 6:54 ` CLEMENT MATHIEU--DRIF
2024-11-19 7:28 ` Duan, Zhenzhong
2024-12-04 3:34 ` Jason Wang
1 sibling, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-19 6:54 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Yi Sun, Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
Hi zhenzhong,
Just one comment but you can add Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
On 11/11/2024 09:34, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
> related to scalable mode translation, thus there are multiple combinations.
>
> This vIOMMU implementation wants to simplify it with a new property "x-flts".
> When enabled in scalable mode, first stage translation also known as scalable
> modern mode is supported. When enabled in legacy mode, throw out error.
>
> With scalable modern mode exposed to user, also accurate the pasid entry
> check in vtd_pe_type_check().
>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 ++
> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2c977aa7da..e8b211e8b0 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -195,6 +195,7 @@
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_SMTS (1ULL << 43)
> #define VTD_ECAP_SLTS (1ULL << 46)
> +#define VTD_ECAP_FLTS (1ULL << 47)
>
> /* CAP_REG */
> /* (offset >> 4) << 24 */
> @@ -211,6 +212,7 @@
> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
> #define VTD_CAP_DRAIN_READ (1ULL << 55)
> +#define VTD_CAP_FS1GP (1ULL << 56)
> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
> #define VTD_CAP_CM (1ULL << 7)
> #define VTD_PASID_ID_SHIFT 20
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index b921793c3a..a7a81aebee 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
> }
>
> /* Return true if check passed, otherwise false */
> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
> - VTDPASIDEntry *pe)
> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> {
> switch (VTD_PE_GET_TYPE(pe)) {
> - case VTD_SM_PASID_ENTRY_SLT:
> - return true;
> - case VTD_SM_PASID_ENTRY_PT:
> - return x86_iommu->pt_supported;
> case VTD_SM_PASID_ENTRY_FLT:
> + return !!(s->ecap & VTD_ECAP_FLTS);
> + case VTD_SM_PASID_ENTRY_SLT:
> + return !!(s->ecap & VTD_ECAP_SLTS);
> case VTD_SM_PASID_ENTRY_NESTED:
> + /* Not support NESTED page table type yet */
> + return false;
> + case VTD_SM_PASID_ENTRY_PT:
> + return !!(s->ecap & VTD_ECAP_PT);
> default:
> /* Unknown type */
> return false;
> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> uint8_t pgtt;
> uint32_t index;
> dma_addr_t entry_size;
> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>
> index = VTD_PASID_TABLE_INDEX(pasid);
> entry_size = VTD_PASID_ENTRY_SIZE;
> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> }
>
> /* Do translation type check */
> - if (!vtd_pe_type_check(x86_iommu, pe)) {
> + if (!vtd_pe_type_check(s, pe)) {
> return -VTD_FR_PASID_TABLE_ENTRY_INV;
> }
>
> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
> VTD_HOST_ADDRESS_WIDTH),
> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
> }
>
> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> - if (s->scalable_mode) {
> + if (s->scalable_modern) {
> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
> + s->cap |= VTD_CAP_FS1GP;
> + } else if (s->scalable_mode) {
> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> }
>
> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> }
> }
>
> + if (!s->scalable_mode && s->scalable_modern) {
> + error_setg(errp, "Legacy mode: not support x-flts=on");
The error message should be "x-flts=on not supported in legacy mode" or
even "x-flts is only available in scalable mode" as there is no FLT in
legacy mode
> + return false;
> + }
> +
> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
> s->aw_bits != VTD_HOST_AW_48BIT) {
> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* RE: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-19 6:54 ` CLEMENT MATHIEU--DRIF
@ 2024-11-19 7:28 ` Duan, Zhenzhong
2024-11-19 8:59 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-11-19 7:28 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
Hi Clement,
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
>scalable modern mode
>
>Hi zhenzhong,
>Just one comment but you can add Reviewed-by: Clément Mathieu--
>Drif<clement.mathieu--drif@eviden.com>
>
>
>On 11/11/2024 09:34, Zhenzhong Duan wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>> related to scalable mode translation, thus there are multiple combinations.
>>
>> This vIOMMU implementation wants to simplify it with a new property "x-flts".
>> When enabled in scalable mode, first stage translation also known as scalable
>> modern mode is supported. When enabled in legacy mode, throw out error.
>>
>> With scalable modern mode exposed to user, also accurate the pasid entry
>> check in vtd_pe_type_check().
>>
>> Suggested-by: Jason Wang <jasowang@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_internal.h | 2 ++
>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 2c977aa7da..e8b211e8b0 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -195,6 +195,7 @@
>> #define VTD_ECAP_PASID (1ULL << 40)
>> #define VTD_ECAP_SMTS (1ULL << 43)
>> #define VTD_ECAP_SLTS (1ULL << 46)
>> +#define VTD_ECAP_FLTS (1ULL << 47)
>>
>> /* CAP_REG */
>> /* (offset >> 4) << 24 */
>> @@ -211,6 +212,7 @@
>> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
>> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
>> #define VTD_CAP_DRAIN_READ (1ULL << 55)
>> +#define VTD_CAP_FS1GP (1ULL << 56)
>> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ |
>VTD_CAP_DRAIN_WRITE)
>> #define VTD_CAP_CM (1ULL << 7)
>> #define VTD_PASID_ID_SHIFT 20
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index b921793c3a..a7a81aebee 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -803,16 +803,18 @@ static inline bool
>vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
>> }
>>
>> /* Return true if check passed, otherwise false */
>> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
>> - VTDPASIDEntry *pe)
>> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>> {
>> switch (VTD_PE_GET_TYPE(pe)) {
>> - case VTD_SM_PASID_ENTRY_SLT:
>> - return true;
>> - case VTD_SM_PASID_ENTRY_PT:
>> - return x86_iommu->pt_supported;
>> case VTD_SM_PASID_ENTRY_FLT:
>> + return !!(s->ecap & VTD_ECAP_FLTS);
>> + case VTD_SM_PASID_ENTRY_SLT:
>> + return !!(s->ecap & VTD_ECAP_SLTS);
>> case VTD_SM_PASID_ENTRY_NESTED:
>> + /* Not support NESTED page table type yet */
>> + return false;
>> + case VTD_SM_PASID_ENTRY_PT:
>> + return !!(s->ecap & VTD_ECAP_PT);
>> default:
>> /* Unknown type */
>> return false;
>> @@ -861,7 +863,6 @@ static int
>vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>> uint8_t pgtt;
>> uint32_t index;
>> dma_addr_t entry_size;
>> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>
>> index = VTD_PASID_TABLE_INDEX(pasid);
>> entry_size = VTD_PASID_ENTRY_SIZE;
>> @@ -875,7 +876,7 @@ static int
>vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>> }
>>
>> /* Do translation type check */
>> - if (!vtd_pe_type_check(x86_iommu, pe)) {
>> + if (!vtd_pe_type_check(s, pe)) {
>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
>> }
>>
>> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
>> VTD_HOST_ADDRESS_WIDTH),
>> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode,
>FALSE),
>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode,
>FALSE),
>> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control,
>false),
>> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
>> }
>>
>> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>> - if (s->scalable_mode) {
>> + if (s->scalable_modern) {
>> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
>> + s->cap |= VTD_CAP_FS1GP;
>> + } else if (s->scalable_mode) {
>> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>> }
>>
>> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s,
>Error **errp)
>> }
>> }
>>
>> + if (!s->scalable_mode && s->scalable_modern) {
>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>The error message should be "x-flts=on not supported in legacy mode" or
>even "x-flts is only available in scalable mode" as there is no FLT in
>legacy mode
OK, will do.
But I'm not quite clear of the difference between
"Legacy mode: not support x-flts=on" and "x-flts=on not supported in legacy mode".
Is it because the later looks more formal or the former has ambiguity?
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-19 7:28 ` Duan, Zhenzhong
@ 2024-11-19 8:59 ` CLEMENT MATHIEU--DRIF
2024-11-19 9:25 ` Duan, Zhenzhong
0 siblings, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-19 8:59 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
[-- Attachment #1: Type: text/plain, Size: 6328 bytes --]
On 19/11/2024 08:28, Duan, Zhenzhong wrote:
Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
Hi Clement,
-----Original Message-----
From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
scalable modern mode
Hi zhenzhong,
Just one comment but you can add Reviewed-by: Clément Mathieu--
Drif<clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
On 11/11/2024 09:34, Zhenzhong Duan wrote:
Caution: External email. Do not open attachments or click links, unless this
email comes from a known sender and you know the content is safe.
Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
This vIOMMU implementation wants to simplify it with a new property "x-flts".
When enabled in scalable mode, first stage translation also known as scalable
modern mode is supported. When enabled in legacy mode, throw out error.
With scalable modern mode exposed to user, also accurate the pasid entry
check in vtd_pe_type_check().
Suggested-by: Jason Wang <jasowang@redhat.com><mailto:jasowang@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com><mailto:yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com><mailto:yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com><mailto:zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 ++
hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2c977aa7da..e8b211e8b0 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
+#define VTD_ECAP_FLTS (1ULL << 47)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_FS1GP (1ULL << 56)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ |
VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b921793c3a..a7a81aebee 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -803,16 +803,18 @@ static inline bool
vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
}
/* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
- case VTD_SM_PASID_ENTRY_SLT:
- return true;
- case VTD_SM_PASID_ENTRY_PT:
- return x86_iommu->pt_supported;
case VTD_SM_PASID_ENTRY_FLT:
+ return !!(s->ecap & VTD_ECAP_FLTS);
+ case VTD_SM_PASID_ENTRY_SLT:
+ return !!(s->ecap & VTD_ECAP_SLTS);
case VTD_SM_PASID_ENTRY_NESTED:
+ /* Not support NESTED page table type yet */
+ return false;
+ case VTD_SM_PASID_ENTRY_PT:
+ return !!(s->ecap & VTD_ECAP_PT);
default:
/* Unknown type */
return false;
@@ -861,7 +863,6 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
@@ -875,7 +876,7 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
}
/* Do translation type check */
- if (!vtd_pe_type_check(x86_iommu, pe)) {
+ if (!vtd_pe_type_check(s, pe)) {
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
@@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode,
FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode,
FALSE),
+ DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control,
false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
@@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
- if (s->scalable_mode) {
+ if (s->scalable_modern) {
+ s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
+ s->cap |= VTD_CAP_FS1GP;
+ } else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
@@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s,
Error **errp)
}
}
+ if (!s->scalable_mode && s->scalable_modern) {
+ error_setg(errp, "Legacy mode: not support x-flts=on");
The error message should be "x-flts=on not supported in legacy mode" or
even "x-flts is only available in scalable mode" as there is no FLT in
legacy mode
OK, will do.
But I'm not quite clear of the difference between
"Legacy mode: not support x-flts=on" and "x-flts=on not supported in legacy mode".
Is it because the later looks more formal or the former has ambiguity?
It's just because the former looks more natural.
But I think the most appropriate would be : "x-flts only available in scalable mode" because the issue is about "availability", not "support" Thanks >cmd
Thanks
Zhenzhong
[-- Attachment #2: Type: text/html, Size: 8870 bytes --]
^ permalink raw reply related [flat|nested] 46+ messages in thread
* RE: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-19 8:59 ` CLEMENT MATHIEU--DRIF
@ 2024-11-19 9:25 ` Duan, Zhenzhong
2024-11-20 6:11 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-11-19 9:25 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
[-- Attachment #1: Type: text/plain, Size: 7395 bytes --]
Clear, will use "x-flts is only available in scalable mode". Thanks Clement.
From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
Sent: Tuesday, November 19, 2024 5:00 PM
To: Duan, Zhenzhong <zhenzhong.duan@intel.com>; qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com; clg@redhat.com; eric.auger@redhat.com; mst@redhat.com; peterx@redhat.com; jasowang@redhat.com; jgg@nvidia.com; nicolinc@nvidia.com; joao.m.martins@oracle.com; Tian, Kevin <kevin.tian@intel.com>; Liu, Yi L <yi.l.liu@intel.com>; Peng, Chao P <chao.p.peng@intel.com>; Yi Sun <yi.y.sun@linux.intel.com>; Marcel Apfelbaum <marcel.apfelbaum@gmail.com>; Paolo Bonzini <pbonzini@redhat.com>; Richard Henderson <richard.henderson@linaro.org>; Eduardo Habkost <eduardo@habkost.net>
Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
On 19/11/2024 08:28, Duan, Zhenzhong wrote:
Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
Hi Clement,
-----Original Message-----
From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
scalable modern mode
Hi zhenzhong,
Just one comment but you can add Reviewed-by: Clément Mathieu--
Drif<clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
On 11/11/2024 09:34, Zhenzhong Duan wrote:
Caution: External email. Do not open attachments or click links, unless this
email comes from a known sender and you know the content is safe.
Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
This vIOMMU implementation wants to simplify it with a new property "x-flts".
When enabled in scalable mode, first stage translation also known as scalable
modern mode is supported. When enabled in legacy mode, throw out error.
With scalable modern mode exposed to user, also accurate the pasid entry
check in vtd_pe_type_check().
Suggested-by: Jason Wang <jasowang@redhat.com><mailto:jasowang@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com><mailto:yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com><mailto:yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com><mailto:zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 ++
hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2c977aa7da..e8b211e8b0 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
+#define VTD_ECAP_FLTS (1ULL << 47)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_FS1GP (1ULL << 56)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ |
VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b921793c3a..a7a81aebee 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -803,16 +803,18 @@ static inline bool
vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
}
/* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
- case VTD_SM_PASID_ENTRY_SLT:
- return true;
- case VTD_SM_PASID_ENTRY_PT:
- return x86_iommu->pt_supported;
case VTD_SM_PASID_ENTRY_FLT:
+ return !!(s->ecap & VTD_ECAP_FLTS);
+ case VTD_SM_PASID_ENTRY_SLT:
+ return !!(s->ecap & VTD_ECAP_SLTS);
case VTD_SM_PASID_ENTRY_NESTED:
+ /* Not support NESTED page table type yet */
+ return false;
+ case VTD_SM_PASID_ENTRY_PT:
+ return !!(s->ecap & VTD_ECAP_PT);
default:
/* Unknown type */
return false;
@@ -861,7 +863,6 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
@@ -875,7 +876,7 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
}
/* Do translation type check */
- if (!vtd_pe_type_check(x86_iommu, pe)) {
+ if (!vtd_pe_type_check(s, pe)) {
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
@@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode,
FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode,
FALSE),
+ DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control,
false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
@@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
- if (s->scalable_mode) {
+ if (s->scalable_modern) {
+ s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
+ s->cap |= VTD_CAP_FS1GP;
+ } else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
@@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s,
Error **errp)
}
}
+ if (!s->scalable_mode && s->scalable_modern) {
+ error_setg(errp, "Legacy mode: not support x-flts=on");
The error message should be "x-flts=on not supported in legacy mode" or
even "x-flts is only available in scalable mode" as there is no FLT in
legacy mode
OK, will do.
But I'm not quite clear of the difference between
"Legacy mode: not support x-flts=on" and "x-flts=on not supported in legacy mode".
Is it because the later looks more formal or the former has ambiguity?
It's just because the former looks more natural.
But I think the most appropriate would be : "x-flts only available in scalable mode" because the issue is about "availability", not "support" Thanks >cmd
Thanks
Zhenzhong
[-- Attachment #2: Type: text/html, Size: 17720 bytes --]
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-19 9:25 ` Duan, Zhenzhong
@ 2024-11-20 6:11 ` CLEMENT MATHIEU--DRIF
0 siblings, 0 replies; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-11-20 6:11 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
[-- Attachment #1: Type: text/plain, Size: 8267 bytes --]
ok, feel free to add my RB if this is the only change
Thanks
cmd
On 19/11/2024 10:25, Duan, Zhenzhong wrote:
Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
Clear, will use "x-flts is only available in scalable mode". Thanks Clement.
From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
Sent: Tuesday, November 19, 2024 5:00 PM
To: Duan, Zhenzhong <zhenzhong.duan@intel.com><mailto:zhenzhong.duan@intel.com>; qemu-devel@nongnu.org<mailto:qemu-devel@nongnu.org>
Cc: alex.williamson@redhat.com<mailto:alex.williamson@redhat.com>; clg@redhat.com<mailto:clg@redhat.com>; eric.auger@redhat.com<mailto:eric.auger@redhat.com>; mst@redhat.com<mailto:mst@redhat.com>; peterx@redhat.com<mailto:peterx@redhat.com>; jasowang@redhat.com<mailto:jasowang@redhat.com>; jgg@nvidia.com<mailto:jgg@nvidia.com>; nicolinc@nvidia.com<mailto:nicolinc@nvidia.com>; joao.m.martins@oracle.com<mailto:joao.m.martins@oracle.com>; Tian, Kevin <kevin.tian@intel.com><mailto:kevin.tian@intel.com>; Liu, Yi L <yi.l.liu@intel.com><mailto:yi.l.liu@intel.com>; Peng, Chao P <chao.p.peng@intel.com><mailto:chao.p.peng@intel.com>; Yi Sun <yi.y.sun@linux.intel.com><mailto:yi.y.sun@linux.intel.com>; Marcel Apfelbaum <marcel.apfelbaum@gmail.com><mailto:marcel.apfelbaum@gmail.com>; Paolo Bonzini <pbonzini@redhat.com><mailto:pbonzini@redhat.com>; Richard Henderson <richard.henderson@linaro.org><mailto:richard.henderson@linaro.org>; Eduardo Habkost <eduardo@habkost.net><mailto:eduardo@habkost.net>
Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
On 19/11/2024 08:28, Duan, Zhenzhong wrote:
Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
Hi Clement,
-----Original Message-----
From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
scalable modern mode
Hi zhenzhong,
Just one comment but you can add Reviewed-by: Clément Mathieu--
Drif<clement.mathieu--drif@eviden.com><mailto:clement.mathieu--drif@eviden.com>
On 11/11/2024 09:34, Zhenzhong Duan wrote:
Caution: External email. Do not open attachments or click links, unless this
email comes from a known sender and you know the content is safe.
Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
related to scalable mode translation, thus there are multiple combinations.
This vIOMMU implementation wants to simplify it with a new property "x-flts".
When enabled in scalable mode, first stage translation also known as scalable
modern mode is supported. When enabled in legacy mode, throw out error.
With scalable modern mode exposed to user, also accurate the pasid entry
check in vtd_pe_type_check().
Suggested-by: Jason Wang <jasowang@redhat.com><mailto:jasowang@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com><mailto:yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com><mailto:yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com><mailto:zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 ++
hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2c977aa7da..e8b211e8b0 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
+#define VTD_ECAP_FLTS (1ULL << 47)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_FS1GP (1ULL << 56)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ |
VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b921793c3a..a7a81aebee 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -803,16 +803,18 @@ static inline bool
vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
}
/* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
- case VTD_SM_PASID_ENTRY_SLT:
- return true;
- case VTD_SM_PASID_ENTRY_PT:
- return x86_iommu->pt_supported;
case VTD_SM_PASID_ENTRY_FLT:
+ return !!(s->ecap & VTD_ECAP_FLTS);
+ case VTD_SM_PASID_ENTRY_SLT:
+ return !!(s->ecap & VTD_ECAP_SLTS);
case VTD_SM_PASID_ENTRY_NESTED:
+ /* Not support NESTED page table type yet */
+ return false;
+ case VTD_SM_PASID_ENTRY_PT:
+ return !!(s->ecap & VTD_ECAP_PT);
default:
/* Unknown type */
return false;
@@ -861,7 +863,6 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
@@ -875,7 +876,7 @@ static int
vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
}
/* Do translation type check */
- if (!vtd_pe_type_check(x86_iommu, pe)) {
+ if (!vtd_pe_type_check(s, pe)) {
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
@@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode,
FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode,
FALSE),
+ DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control,
false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
@@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
- if (s->scalable_mode) {
+ if (s->scalable_modern) {
+ s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
+ s->cap |= VTD_CAP_FS1GP;
+ } else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
@@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s,
Error **errp)
}
}
+ if (!s->scalable_mode && s->scalable_modern) {
+ error_setg(errp, "Legacy mode: not support x-flts=on");
The error message should be "x-flts=on not supported in legacy mode" or
even "x-flts is only available in scalable mode" as there is no FLT in
legacy mode
OK, will do.
But I'm not quite clear of the difference between
"Legacy mode: not support x-flts=on" and "x-flts=on not supported in legacy mode".
Is it because the later looks more formal or the former has ambiguity?
It's just because the former looks more natural.
But I think the most appropriate would be : "x-flts only available in scalable mode" because the issue is about "availability", not "support" Thanks >cmd
Thanks
Zhenzhong
[-- Attachment #2: Type: text/html, Size: 19853 bytes --]
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-11-11 8:34 ` [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode Zhenzhong Duan
2024-11-19 6:54 ` CLEMENT MATHIEU--DRIF
@ 2024-12-04 3:34 ` Jason Wang
2024-12-04 6:14 ` CLEMENT MATHIEU--DRIF
1 sibling, 1 reply; 46+ messages in thread
From: Jason Wang @ 2024-12-04 3:34 UTC (permalink / raw)
To: Zhenzhong Duan
Cc: qemu-devel, alex.williamson, clg, eric.auger, mst, peterx, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Yi Sun, Marcel Apfelbaum, Paolo Bonzini,
Richard Henderson, Eduardo Habkost
On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>
> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
> related to scalable mode translation, thus there are multiple combinations.
>
> This vIOMMU implementation wants to simplify it with a new property "x-flts".
> When enabled in scalable mode, first stage translation also known as scalable
> modern mode is supported. When enabled in legacy mode, throw out error.
>
> With scalable modern mode exposed to user, also accurate the pasid entry
> check in vtd_pe_type_check().
>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 ++
> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2c977aa7da..e8b211e8b0 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -195,6 +195,7 @@
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_SMTS (1ULL << 43)
> #define VTD_ECAP_SLTS (1ULL << 46)
> +#define VTD_ECAP_FLTS (1ULL << 47)
>
> /* CAP_REG */
> /* (offset >> 4) << 24 */
> @@ -211,6 +212,7 @@
> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
> #define VTD_CAP_DRAIN_READ (1ULL << 55)
> +#define VTD_CAP_FS1GP (1ULL << 56)
> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
> #define VTD_CAP_CM (1ULL << 7)
> #define VTD_PASID_ID_SHIFT 20
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index b921793c3a..a7a81aebee 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
> }
>
> /* Return true if check passed, otherwise false */
> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
> - VTDPASIDEntry *pe)
> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> {
> switch (VTD_PE_GET_TYPE(pe)) {
> - case VTD_SM_PASID_ENTRY_SLT:
> - return true;
> - case VTD_SM_PASID_ENTRY_PT:
> - return x86_iommu->pt_supported;
> case VTD_SM_PASID_ENTRY_FLT:
> + return !!(s->ecap & VTD_ECAP_FLTS);
> + case VTD_SM_PASID_ENTRY_SLT:
> + return !!(s->ecap & VTD_ECAP_SLTS);
> case VTD_SM_PASID_ENTRY_NESTED:
> + /* Not support NESTED page table type yet */
> + return false;
> + case VTD_SM_PASID_ENTRY_PT:
> + return !!(s->ecap & VTD_ECAP_PT);
> default:
> /* Unknown type */
> return false;
> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> uint8_t pgtt;
> uint32_t index;
> dma_addr_t entry_size;
> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>
> index = VTD_PASID_TABLE_INDEX(pasid);
> entry_size = VTD_PASID_ENTRY_SIZE;
> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> }
>
> /* Do translation type check */
> - if (!vtd_pe_type_check(x86_iommu, pe)) {
> + if (!vtd_pe_type_check(s, pe)) {
> return -VTD_FR_PASID_TABLE_ENTRY_INV;
> }
>
> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
> VTD_HOST_ADDRESS_WIDTH),
> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
> }
>
> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> - if (s->scalable_mode) {
> + if (s->scalable_modern) {
> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
> + s->cap |= VTD_CAP_FS1GP;
> + } else if (s->scalable_mode) {
> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> }
>
> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> }
> }
>
> + if (!s->scalable_mode && s->scalable_modern) {
> + error_setg(errp, "Legacy mode: not support x-flts=on");
This seems to be wired, should we say "scalable mode is needed for
scalable modern mode"?
> + return false;
> + }
> +
> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
> s->aw_bits != VTD_HOST_AW_48BIT) {
> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
> --
> 2.34.1
>
Thanks
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-04 3:34 ` Jason Wang
@ 2024-12-04 6:14 ` CLEMENT MATHIEU--DRIF
2024-12-09 3:13 ` Jason Wang
0 siblings, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-12-04 6:14 UTC (permalink / raw)
To: Jason Wang, Zhenzhong Duan
Cc: qemu-devel@nongnu.org, alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, mst@redhat.com, peterx@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
Yi Sun, Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
On 04/12/2024 04:34, Jason Wang wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>>
>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>> related to scalable mode translation, thus there are multiple combinations.
>>
>> This vIOMMU implementation wants to simplify it with a new property "x-flts".
>> When enabled in scalable mode, first stage translation also known as scalable
>> modern mode is supported. When enabled in legacy mode, throw out error.
>>
>> With scalable modern mode exposed to user, also accurate the pasid entry
>> check in vtd_pe_type_check().
>>
>> Suggested-by: Jason Wang <jasowang@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_internal.h | 2 ++
>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 2c977aa7da..e8b211e8b0 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -195,6 +195,7 @@
>> #define VTD_ECAP_PASID (1ULL << 40)
>> #define VTD_ECAP_SMTS (1ULL << 43)
>> #define VTD_ECAP_SLTS (1ULL << 46)
>> +#define VTD_ECAP_FLTS (1ULL << 47)
>>
>> /* CAP_REG */
>> /* (offset >> 4) << 24 */
>> @@ -211,6 +212,7 @@
>> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
>> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
>> #define VTD_CAP_DRAIN_READ (1ULL << 55)
>> +#define VTD_CAP_FS1GP (1ULL << 56)
>> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
>> #define VTD_CAP_CM (1ULL << 7)
>> #define VTD_PASID_ID_SHIFT 20
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index b921793c3a..a7a81aebee 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
>> }
>>
>> /* Return true if check passed, otherwise false */
>> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
>> - VTDPASIDEntry *pe)
>> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>> {
>> switch (VTD_PE_GET_TYPE(pe)) {
>> - case VTD_SM_PASID_ENTRY_SLT:
>> - return true;
>> - case VTD_SM_PASID_ENTRY_PT:
>> - return x86_iommu->pt_supported;
>> case VTD_SM_PASID_ENTRY_FLT:
>> + return !!(s->ecap & VTD_ECAP_FLTS);
>> + case VTD_SM_PASID_ENTRY_SLT:
>> + return !!(s->ecap & VTD_ECAP_SLTS);
>> case VTD_SM_PASID_ENTRY_NESTED:
>> + /* Not support NESTED page table type yet */
>> + return false;
>> + case VTD_SM_PASID_ENTRY_PT:
>> + return !!(s->ecap & VTD_ECAP_PT);
>> default:
>> /* Unknown type */
>> return false;
>> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>> uint8_t pgtt;
>> uint32_t index;
>> dma_addr_t entry_size;
>> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>
>> index = VTD_PASID_TABLE_INDEX(pasid);
>> entry_size = VTD_PASID_ENTRY_SIZE;
>> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>> }
>>
>> /* Do translation type check */
>> - if (!vtd_pe_type_check(x86_iommu, pe)) {
>> + if (!vtd_pe_type_check(s, pe)) {
>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
>> }
>>
>> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
>> VTD_HOST_ADDRESS_WIDTH),
>> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
>> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
>> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
>> }
>>
>> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>> - if (s->scalable_mode) {
>> + if (s->scalable_modern) {
>> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
>> + s->cap |= VTD_CAP_FS1GP;
>> + } else if (s->scalable_mode) {
>> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>> }
>>
>> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> }
>> }
>>
>> + if (!s->scalable_mode && s->scalable_modern) {
>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>
> This seems to be wired, should we say "scalable mode is needed for
> scalable modern mode"?
Hi Jason,
We agreed to use the following sentence: "x-flts is only available in
scalable mode"
Does it look goot to you?
Thanks
cmd
>
>> + return false;
>> + }
>> +
>> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
>> s->aw_bits != VTD_HOST_AW_48BIT) {
>> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
>> --
>> 2.34.1
>>
>
> Thanks
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-04 6:14 ` CLEMENT MATHIEU--DRIF
@ 2024-12-09 3:13 ` Jason Wang
2024-12-09 6:14 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2024-12-09 3:13 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF
Cc: Zhenzhong Duan, qemu-devel@nongnu.org, alex.williamson@redhat.com,
clg@redhat.com, eric.auger@redhat.com, mst@redhat.com,
peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, chao.p.peng@intel.com, Yi Sun,
Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
<clement.mathieu--drif@eviden.com> wrote:
>
>
>
> On 04/12/2024 04:34, Jason Wang wrote:
> > Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
> >
> >
> > On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
> >>
> >> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
> >> related to scalable mode translation, thus there are multiple combinations.
> >>
> >> This vIOMMU implementation wants to simplify it with a new property "x-flts".
> >> When enabled in scalable mode, first stage translation also known as scalable
> >> modern mode is supported. When enabled in legacy mode, throw out error.
> >>
> >> With scalable modern mode exposed to user, also accurate the pasid entry
> >> check in vtd_pe_type_check().
> >>
> >> Suggested-by: Jason Wang <jasowang@redhat.com>
> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >> ---
> >> hw/i386/intel_iommu_internal.h | 2 ++
> >> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
> >> 2 files changed, 21 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> >> index 2c977aa7da..e8b211e8b0 100644
> >> --- a/hw/i386/intel_iommu_internal.h
> >> +++ b/hw/i386/intel_iommu_internal.h
> >> @@ -195,6 +195,7 @@
> >> #define VTD_ECAP_PASID (1ULL << 40)
> >> #define VTD_ECAP_SMTS (1ULL << 43)
> >> #define VTD_ECAP_SLTS (1ULL << 46)
> >> +#define VTD_ECAP_FLTS (1ULL << 47)
> >>
> >> /* CAP_REG */
> >> /* (offset >> 4) << 24 */
> >> @@ -211,6 +212,7 @@
> >> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
> >> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
> >> #define VTD_CAP_DRAIN_READ (1ULL << 55)
> >> +#define VTD_CAP_FS1GP (1ULL << 56)
> >> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
> >> #define VTD_CAP_CM (1ULL << 7)
> >> #define VTD_PASID_ID_SHIFT 20
> >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> >> index b921793c3a..a7a81aebee 100644
> >> --- a/hw/i386/intel_iommu.c
> >> +++ b/hw/i386/intel_iommu.c
> >> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
> >> }
> >>
> >> /* Return true if check passed, otherwise false */
> >> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
> >> - VTDPASIDEntry *pe)
> >> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> >> {
> >> switch (VTD_PE_GET_TYPE(pe)) {
> >> - case VTD_SM_PASID_ENTRY_SLT:
> >> - return true;
> >> - case VTD_SM_PASID_ENTRY_PT:
> >> - return x86_iommu->pt_supported;
> >> case VTD_SM_PASID_ENTRY_FLT:
> >> + return !!(s->ecap & VTD_ECAP_FLTS);
> >> + case VTD_SM_PASID_ENTRY_SLT:
> >> + return !!(s->ecap & VTD_ECAP_SLTS);
> >> case VTD_SM_PASID_ENTRY_NESTED:
> >> + /* Not support NESTED page table type yet */
> >> + return false;
> >> + case VTD_SM_PASID_ENTRY_PT:
> >> + return !!(s->ecap & VTD_ECAP_PT);
> >> default:
> >> /* Unknown type */
> >> return false;
> >> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >> uint8_t pgtt;
> >> uint32_t index;
> >> dma_addr_t entry_size;
> >> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> >>
> >> index = VTD_PASID_TABLE_INDEX(pasid);
> >> entry_size = VTD_PASID_ENTRY_SIZE;
> >> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >> }
> >>
> >> /* Do translation type check */
> >> - if (!vtd_pe_type_check(x86_iommu, pe)) {
> >> + if (!vtd_pe_type_check(s, pe)) {
> >> return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >> }
> >>
> >> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
> >> VTD_HOST_ADDRESS_WIDTH),
> >> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
> >> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> >> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
> >> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> >> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> >> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> >> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
> >> }
> >>
> >> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> >> - if (s->scalable_mode) {
> >> + if (s->scalable_modern) {
> >> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
> >> + s->cap |= VTD_CAP_FS1GP;
> >> + } else if (s->scalable_mode) {
> >> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> >> }
> >>
> >> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> >> }
> >> }
> >>
> >> + if (!s->scalable_mode && s->scalable_modern) {
> >> + error_setg(errp, "Legacy mode: not support x-flts=on");
> >
> > This seems to be wired, should we say "scalable mode is needed for
> > scalable modern mode"?
>
> Hi Jason,
>
> We agreed to use the following sentence: "x-flts is only available in
> scalable mode"
>
> Does it look goot to you?
Better but if we add more features to the scalable modern, we need to
change the error message here.
Thanks
>
> Thanks
> cmd
>
> >
> >> + return false;
> >> + }
> >> +
> >> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
> >> s->aw_bits != VTD_HOST_AW_48BIT) {
> >> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
> >> --
> >> 2.34.1
> >>
> >
> > Thanks
> >
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-09 3:13 ` Jason Wang
@ 2024-12-09 6:14 ` CLEMENT MATHIEU--DRIF
2024-12-09 6:24 ` Jason Wang
0 siblings, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-12-09 6:14 UTC (permalink / raw)
To: Jason Wang
Cc: Zhenzhong Duan, qemu-devel@nongnu.org, alex.williamson@redhat.com,
clg@redhat.com, eric.auger@redhat.com, mst@redhat.com,
peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, chao.p.peng@intel.com, Yi Sun,
Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
On 09/12/2024 04:13, Jason Wang wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
> <clement.mathieu--drif@eviden.com> wrote:
>>
>>
>>
>> On 04/12/2024 04:34, Jason Wang wrote:
>>> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>>>
>>>
>>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>>>>
>>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>>>> related to scalable mode translation, thus there are multiple combinations.
>>>>
>>>> This vIOMMU implementation wants to simplify it with a new property "x-flts".
>>>> When enabled in scalable mode, first stage translation also known as scalable
>>>> modern mode is supported. When enabled in legacy mode, throw out error.
>>>>
>>>> With scalable modern mode exposed to user, also accurate the pasid entry
>>>> check in vtd_pe_type_check().
>>>>
>>>> Suggested-by: Jason Wang <jasowang@redhat.com>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>> hw/i386/intel_iommu_internal.h | 2 ++
>>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>>> index 2c977aa7da..e8b211e8b0 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -195,6 +195,7 @@
>>>> #define VTD_ECAP_PASID (1ULL << 40)
>>>> #define VTD_ECAP_SMTS (1ULL << 43)
>>>> #define VTD_ECAP_SLTS (1ULL << 46)
>>>> +#define VTD_ECAP_FLTS (1ULL << 47)
>>>>
>>>> /* CAP_REG */
>>>> /* (offset >> 4) << 24 */
>>>> @@ -211,6 +212,7 @@
>>>> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
>>>> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
>>>> #define VTD_CAP_DRAIN_READ (1ULL << 55)
>>>> +#define VTD_CAP_FS1GP (1ULL << 56)
>>>> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
>>>> #define VTD_CAP_CM (1ULL << 7)
>>>> #define VTD_PASID_ID_SHIFT 20
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index b921793c3a..a7a81aebee 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
>>>> }
>>>>
>>>> /* Return true if check passed, otherwise false */
>>>> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
>>>> - VTDPASIDEntry *pe)
>>>> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>>> {
>>>> switch (VTD_PE_GET_TYPE(pe)) {
>>>> - case VTD_SM_PASID_ENTRY_SLT:
>>>> - return true;
>>>> - case VTD_SM_PASID_ENTRY_PT:
>>>> - return x86_iommu->pt_supported;
>>>> case VTD_SM_PASID_ENTRY_FLT:
>>>> + return !!(s->ecap & VTD_ECAP_FLTS);
>>>> + case VTD_SM_PASID_ENTRY_SLT:
>>>> + return !!(s->ecap & VTD_ECAP_SLTS);
>>>> case VTD_SM_PASID_ENTRY_NESTED:
>>>> + /* Not support NESTED page table type yet */
>>>> + return false;
>>>> + case VTD_SM_PASID_ENTRY_PT:
>>>> + return !!(s->ecap & VTD_ECAP_PT);
>>>> default:
>>>> /* Unknown type */
>>>> return false;
>>>> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>> uint8_t pgtt;
>>>> uint32_t index;
>>>> dma_addr_t entry_size;
>>>> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>>>
>>>> index = VTD_PASID_TABLE_INDEX(pasid);
>>>> entry_size = VTD_PASID_ENTRY_SIZE;
>>>> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>> }
>>>>
>>>> /* Do translation type check */
>>>> - if (!vtd_pe_type_check(x86_iommu, pe)) {
>>>> + if (!vtd_pe_type_check(s, pe)) {
>>>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>> }
>>>>
>>>> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
>>>> VTD_HOST_ADDRESS_WIDTH),
>>>> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
>>>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
>>>> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
>>>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
>>>> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>>>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>>>> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
>>>> }
>>>>
>>>> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>>>> - if (s->scalable_mode) {
>>>> + if (s->scalable_modern) {
>>>> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
>>>> + s->cap |= VTD_CAP_FS1GP;
>>>> + } else if (s->scalable_mode) {
>>>> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>>>> }
>>>>
>>>> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>>> }
>>>> }
>>>>
>>>> + if (!s->scalable_mode && s->scalable_modern) {
>>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>>>
>>> This seems to be wired, should we say "scalable mode is needed for
>>> scalable modern mode"?
>>
>> Hi Jason,
>>
>> We agreed to use the following sentence: "x-flts is only available in
>> scalable mode"
>>
>> Does it look goot to you?
>
> Better but if we add more features to the scalable modern, we need to
> change the error message here.
Hi Jason
Maybe the weirdness comes from the fact that x-flts on the command line
is mapped to scalable_modern in the code?
Thanks
>cmd
>
> Thanks
>
>>
>> Thanks
>> cmd
>>
>>>
>>>> + return false;
>>>> + }
>>>> +
>>>> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
>>>> s->aw_bits != VTD_HOST_AW_48BIT) {
>>>> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
>>>> --
>>>> 2.34.1
>>>>
>>>
>>> Thanks
>>>
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-09 6:14 ` CLEMENT MATHIEU--DRIF
@ 2024-12-09 6:24 ` Jason Wang
2024-12-09 6:42 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2024-12-09 6:24 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF
Cc: Zhenzhong Duan, qemu-devel@nongnu.org, alex.williamson@redhat.com,
clg@redhat.com, eric.auger@redhat.com, mst@redhat.com,
peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, chao.p.peng@intel.com, Yi Sun,
Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
On Mon, Dec 9, 2024 at 2:15 PM CLEMENT MATHIEU--DRIF
<clement.mathieu--drif@eviden.com> wrote:
>
>
>
> On 09/12/2024 04:13, Jason Wang wrote:
> > Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
> >
> >
> > On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
> > <clement.mathieu--drif@eviden.com> wrote:
> >>
> >>
> >>
> >> On 04/12/2024 04:34, Jason Wang wrote:
> >>> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
> >>>
> >>>
> >>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
> >>>>
> >>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
> >>>> related to scalable mode translation, thus there are multiple combinations.
> >>>>
> >>>> This vIOMMU implementation wants to simplify it with a new property "x-flts".
> >>>> When enabled in scalable mode, first stage translation also known as scalable
> >>>> modern mode is supported. When enabled in legacy mode, throw out error.
> >>>>
> >>>> With scalable modern mode exposed to user, also accurate the pasid entry
> >>>> check in vtd_pe_type_check().
> >>>>
> >>>> Suggested-by: Jason Wang <jasowang@redhat.com>
> >>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> >>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >>>> ---
> >>>> hw/i386/intel_iommu_internal.h | 2 ++
> >>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
> >>>> 2 files changed, 21 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> >>>> index 2c977aa7da..e8b211e8b0 100644
> >>>> --- a/hw/i386/intel_iommu_internal.h
> >>>> +++ b/hw/i386/intel_iommu_internal.h
> >>>> @@ -195,6 +195,7 @@
> >>>> #define VTD_ECAP_PASID (1ULL << 40)
> >>>> #define VTD_ECAP_SMTS (1ULL << 43)
> >>>> #define VTD_ECAP_SLTS (1ULL << 46)
> >>>> +#define VTD_ECAP_FLTS (1ULL << 47)
> >>>>
> >>>> /* CAP_REG */
> >>>> /* (offset >> 4) << 24 */
> >>>> @@ -211,6 +212,7 @@
> >>>> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
> >>>> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
> >>>> #define VTD_CAP_DRAIN_READ (1ULL << 55)
> >>>> +#define VTD_CAP_FS1GP (1ULL << 56)
> >>>> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
> >>>> #define VTD_CAP_CM (1ULL << 7)
> >>>> #define VTD_PASID_ID_SHIFT 20
> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> >>>> index b921793c3a..a7a81aebee 100644
> >>>> --- a/hw/i386/intel_iommu.c
> >>>> +++ b/hw/i386/intel_iommu.c
> >>>> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
> >>>> }
> >>>>
> >>>> /* Return true if check passed, otherwise false */
> >>>> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
> >>>> - VTDPASIDEntry *pe)
> >>>> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> >>>> {
> >>>> switch (VTD_PE_GET_TYPE(pe)) {
> >>>> - case VTD_SM_PASID_ENTRY_SLT:
> >>>> - return true;
> >>>> - case VTD_SM_PASID_ENTRY_PT:
> >>>> - return x86_iommu->pt_supported;
> >>>> case VTD_SM_PASID_ENTRY_FLT:
> >>>> + return !!(s->ecap & VTD_ECAP_FLTS);
> >>>> + case VTD_SM_PASID_ENTRY_SLT:
> >>>> + return !!(s->ecap & VTD_ECAP_SLTS);
> >>>> case VTD_SM_PASID_ENTRY_NESTED:
> >>>> + /* Not support NESTED page table type yet */
> >>>> + return false;
> >>>> + case VTD_SM_PASID_ENTRY_PT:
> >>>> + return !!(s->ecap & VTD_ECAP_PT);
> >>>> default:
> >>>> /* Unknown type */
> >>>> return false;
> >>>> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >>>> uint8_t pgtt;
> >>>> uint32_t index;
> >>>> dma_addr_t entry_size;
> >>>> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> >>>>
> >>>> index = VTD_PASID_TABLE_INDEX(pasid);
> >>>> entry_size = VTD_PASID_ENTRY_SIZE;
> >>>> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >>>> }
> >>>>
> >>>> /* Do translation type check */
> >>>> - if (!vtd_pe_type_check(x86_iommu, pe)) {
> >>>> + if (!vtd_pe_type_check(s, pe)) {
> >>>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >>>> }
> >>>>
> >>>> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
> >>>> VTD_HOST_ADDRESS_WIDTH),
> >>>> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
> >>>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> >>>> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
> >>>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> >>>> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> >>>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> >>>> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
> >>>> }
> >>>>
> >>>> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
> >>>> - if (s->scalable_mode) {
> >>>> + if (s->scalable_modern) {
> >>>> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
> >>>> + s->cap |= VTD_CAP_FS1GP;
> >>>> + } else if (s->scalable_mode) {
> >>>> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> >>>> }
> >>>>
> >>>> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> >>>> }
> >>>> }
> >>>>
> >>>> + if (!s->scalable_mode && s->scalable_modern) {
> >>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
> >>>
> >>> This seems to be wired, should we say "scalable mode is needed for
> >>> scalable modern mode"?
> >>
> >> Hi Jason,
> >>
> >> We agreed to use the following sentence: "x-flts is only available in
> >> scalable mode"
> >>
> >> Does it look goot to you?
> >
> > Better but if we add more features to the scalable modern, we need to
> > change the error message here.
>
> Hi Jason
>
> Maybe the weirdness comes from the fact that x-flts on the command line
> is mapped to scalable_modern in the code?
Yes, actually the code checks if scalable mode is enabled if scalable
modern is enabled. But this is inconsistent with the error message
(though x-flts was implied there probably).
Thanks
>
> Thanks
> >cmd
>
> >
> > Thanks
> >
> >>
> >> Thanks
> >> cmd
> >>
> >>>
> >>>> + return false;
> >>>> + }
> >>>> +
> >>>> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
> >>>> s->aw_bits != VTD_HOST_AW_48BIT) {
> >>>> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
> >>>> --
> >>>> 2.34.1
> >>>>
> >>>
> >>> Thanks
> >>>
> >
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-09 6:24 ` Jason Wang
@ 2024-12-09 6:42 ` CLEMENT MATHIEU--DRIF
2024-12-11 2:22 ` Duan, Zhenzhong
0 siblings, 1 reply; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-12-09 6:42 UTC (permalink / raw)
To: Jason Wang
Cc: Zhenzhong Duan, qemu-devel@nongnu.org, alex.williamson@redhat.com,
clg@redhat.com, eric.auger@redhat.com, mst@redhat.com,
peterx@redhat.com, jgg@nvidia.com, nicolinc@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, chao.p.peng@intel.com, Yi Sun,
Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
Eduardo Habkost
On 09/12/2024 07:24, Jason Wang wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> On Mon, Dec 9, 2024 at 2:15 PM CLEMENT MATHIEU--DRIF
> <clement.mathieu--drif@eviden.com> wrote:
>>
>>
>> On 09/12/2024 04:13, Jason Wang wrote:
>>> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>>>
>>>
>>> On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
>>> <clement.mathieu--drif@eviden.com> wrote:
>>>>
>>>>
>>>> On 04/12/2024 04:34, Jason Wang wrote:
>>>>> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>>>>>
>>>>>
>>>>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan <zhenzhong.duan@intel.com> wrote:
>>>>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>>>>>> related to scalable mode translation, thus there are multiple combinations.
>>>>>>
>>>>>> This vIOMMU implementation wants to simplify it with a new property "x-flts".
>>>>>> When enabled in scalable mode, first stage translation also known as scalable
>>>>>> modern mode is supported. When enabled in legacy mode, throw out error.
>>>>>>
>>>>>> With scalable modern mode exposed to user, also accurate the pasid entry
>>>>>> check in vtd_pe_type_check().
>>>>>>
>>>>>> Suggested-by: Jason Wang <jasowang@redhat.com>
>>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>> ---
>>>>>> hw/i386/intel_iommu_internal.h | 2 ++
>>>>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>>>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>>>>> index 2c977aa7da..e8b211e8b0 100644
>>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>>>> @@ -195,6 +195,7 @@
>>>>>> #define VTD_ECAP_PASID (1ULL << 40)
>>>>>> #define VTD_ECAP_SMTS (1ULL << 43)
>>>>>> #define VTD_ECAP_SLTS (1ULL << 46)
>>>>>> +#define VTD_ECAP_FLTS (1ULL << 47)
>>>>>>
>>>>>> /* CAP_REG */
>>>>>> /* (offset >> 4) << 24 */
>>>>>> @@ -211,6 +212,7 @@
>>>>>> #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
>>>>>> #define VTD_CAP_DRAIN_WRITE (1ULL << 54)
>>>>>> #define VTD_CAP_DRAIN_READ (1ULL << 55)
>>>>>> +#define VTD_CAP_FS1GP (1ULL << 56)
>>>>>> #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
>>>>>> #define VTD_CAP_CM (1ULL << 7)
>>>>>> #define VTD_PASID_ID_SHIFT 20
>>>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>>>> index b921793c3a..a7a81aebee 100644
>>>>>> --- a/hw/i386/intel_iommu.c
>>>>>> +++ b/hw/i386/intel_iommu.c
>>>>>> @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
>>>>>> }
>>>>>>
>>>>>> /* Return true if check passed, otherwise false */
>>>>>> -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
>>>>>> - VTDPASIDEntry *pe)
>>>>>> +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>>>>> {
>>>>>> switch (VTD_PE_GET_TYPE(pe)) {
>>>>>> - case VTD_SM_PASID_ENTRY_SLT:
>>>>>> - return true;
>>>>>> - case VTD_SM_PASID_ENTRY_PT:
>>>>>> - return x86_iommu->pt_supported;
>>>>>> case VTD_SM_PASID_ENTRY_FLT:
>>>>>> + return !!(s->ecap & VTD_ECAP_FLTS);
>>>>>> + case VTD_SM_PASID_ENTRY_SLT:
>>>>>> + return !!(s->ecap & VTD_ECAP_SLTS);
>>>>>> case VTD_SM_PASID_ENTRY_NESTED:
>>>>>> + /* Not support NESTED page table type yet */
>>>>>> + return false;
>>>>>> + case VTD_SM_PASID_ENTRY_PT:
>>>>>> + return !!(s->ecap & VTD_ECAP_PT);
>>>>>> default:
>>>>>> /* Unknown type */
>>>>>> return false;
>>>>>> @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>>>> uint8_t pgtt;
>>>>>> uint32_t index;
>>>>>> dma_addr_t entry_size;
>>>>>> - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>>>>>
>>>>>> index = VTD_PASID_TABLE_INDEX(pasid);
>>>>>> entry_size = VTD_PASID_ENTRY_SIZE;
>>>>>> @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>>>> }
>>>>>>
>>>>>> /* Do translation type check */
>>>>>> - if (!vtd_pe_type_check(x86_iommu, pe)) {
>>>>>> + if (!vtd_pe_type_check(s, pe)) {
>>>>>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>>>> }
>>>>>>
>>>>>> @@ -3827,6 +3828,7 @@ static Property vtd_properties[] = {
>>>>>> VTD_HOST_ADDRESS_WIDTH),
>>>>>> DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
>>>>>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
>>>>>> + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, scalable_modern, FALSE),
>>>>>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
>>>>>> DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>>>>>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>>>>>> @@ -4558,7 +4560,10 @@ static void vtd_cap_init(IntelIOMMUState *s)
>>>>>> }
>>>>>>
>>>>>> /* TODO: read cap/ecap from host to decide which cap to be exposed. */
>>>>>> - if (s->scalable_mode) {
>>>>>> + if (s->scalable_modern) {
>>>>>> + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
>>>>>> + s->cap |= VTD_CAP_FS1GP;
>>>>>> + } else if (s->scalable_mode) {
>>>>>> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
>>>>>> }
>>>>>>
>>>>>> @@ -4737,6 +4742,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>>>>> }
>>>>>> }
>>>>>>
>>>>>> + if (!s->scalable_mode && s->scalable_modern) {
>>>>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>>>>> This seems to be wired, should we say "scalable mode is needed for
>>>>> scalable modern mode"?
>>>> Hi Jason,
>>>>
>>>> We agreed to use the following sentence: "x-flts is only available in
>>>> scalable mode"
>>>>
>>>> Does it look goot to you?
>>> Better but if we add more features to the scalable modern, we need to
>>> change the error message here.
>> Hi Jason
>>
>> Maybe the weirdness comes from the fact that x-flts on the command line
>> is mapped to scalable_modern in the code?
> Yes, actually the code checks if scalable mode is enabled if scalable
> modern is enabled. But this is inconsistent with the error message
> (though x-flts was implied there probably).
Would you rename s->scalable_modern to s->flts?
>
> Thanks
>
>
>> Thanks
>> >cmd
>>
>>> Thanks
>>>
>>>> Thanks
>>>> cmd
>>>>
>>>>>> + return false;
>>>>>> + }
>>>>>> +
>>>>>> if (!s->scalable_modern && s->aw_bits != VTD_HOST_AW_39BIT &&
>>>>>> s->aw_bits != VTD_HOST_AW_48BIT) {
>>>>>> error_setg(errp, "%s mode: supported values for aw-bits are: %d, %d",
>>>>>> --
>>>>>> 2.34.1
>>>>>>
>>>>> Thanks
>>>>>
^ permalink raw reply [flat|nested] 46+ messages in thread
* RE: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-09 6:42 ` CLEMENT MATHIEU--DRIF
@ 2024-12-11 2:22 ` Duan, Zhenzhong
2024-12-11 3:03 ` Jason Wang
0 siblings, 1 reply; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-12-11 2:22 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, Jason Wang
Cc: qemu-devel@nongnu.org, alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, mst@redhat.com, peterx@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
Hi Jason, Clement,
Sorry for late reply, just back from vacation.
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
>scalable modern mode
>
>
>
>
>On 09/12/2024 07:24, Jason Wang wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> On Mon, Dec 9, 2024 at 2:15 PM CLEMENT MATHIEU--DRIF
>> <clement.mathieu--drif@eviden.com> wrote:
>>>
>>>
>>> On 09/12/2024 04:13, Jason Wang wrote:
>>>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>>>
>>>>
>>>> On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
>>>> <clement.mathieu--drif@eviden.com> wrote:
>>>>>
>>>>>
>>>>> On 04/12/2024 04:34, Jason Wang wrote:
>>>>>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>>>>>
>>>>>>
>>>>>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan
><zhenzhong.duan@intel.com> wrote:
>>>>>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>>>>>>> related to scalable mode translation, thus there are multiple
>combinations.
>>>>>>>
>>>>>>> This vIOMMU implementation wants to simplify it with a new property "x-
>flts".
>>>>>>> When enabled in scalable mode, first stage translation also known as
>scalable
>>>>>>> modern mode is supported. When enabled in legacy mode, throw out
>error.
>>>>>>>
>>>>>>> With scalable modern mode exposed to user, also accurate the pasid
>entry
>>>>>>> check in vtd_pe_type_check().
>>>>>>>
>>>>>>> Suggested-by: Jason Wang <jasowang@redhat.com>
>>>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>>> ---
>>>>>>> hw/i386/intel_iommu_internal.h | 2 ++
>>>>>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>>>>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>>>>>>> index 2c977aa7da..e8b211e8b0 100644
>>>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>>>> +++ b/hw/i386/intel_iommu_internal.h
...
>>>>>>> @@ -4737,6 +4742,11 @@ static bool
>vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>>>>>> }
>>>>>>> }
>>>>>>>
>>>>>>> + if (!s->scalable_mode && s->scalable_modern) {
>>>>>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>>>>>> This seems to be wired, should we say "scalable mode is needed for
>>>>>> scalable modern mode"?
>>>>> Hi Jason,
>>>>>
>>>>> We agreed to use the following sentence: "x-flts is only available in
>>>>> scalable mode"
>>>>>
>>>>> Does it look goot to you?
>>>> Better but if we add more features to the scalable modern, we need to
>>>> change the error message here.
>>> Hi Jason
>>>
>>> Maybe the weirdness comes from the fact that x-flts on the command line
>>> is mapped to scalable_modern in the code?
>> Yes, actually the code checks if scalable mode is enabled if scalable
>> modern is enabled. But this is inconsistent with the error message
>> (though x-flts was implied there probably).
>
>Would you rename s->scalable_modern to s->flts?
Starting from v4, we replace x-scalable-mode=modern with flts=on on QEMU cmdline.
Scalable modern mode is an alias of stage-1 page table, so I reuse s->scalable_modern
in code, I'm fine to rename to s->flts if that's preferred. In that case, maybe we should
also drop the concept of 'scalable modern mode' totally?
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-11 2:22 ` Duan, Zhenzhong
@ 2024-12-11 3:03 ` Jason Wang
2024-12-11 6:08 ` CLEMENT MATHIEU--DRIF
0 siblings, 1 reply; 46+ messages in thread
From: Jason Wang @ 2024-12-11 3:03 UTC (permalink / raw)
To: Duan, Zhenzhong
Cc: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org,
alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, joao.m.martins@oracle.com, Tian, Kevin,
Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum, Paolo Bonzini,
Richard Henderson, Eduardo Habkost
On Wed, Dec 11, 2024 at 10:50 AM Duan, Zhenzhong
<zhenzhong.duan@intel.com> wrote:
>
> Hi Jason, Clement,
>
> Sorry for late reply, just back from vacation.
>
> >-----Original Message-----
> >From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
> >Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
> >scalable modern mode
> >
> >
> >
> >
> >On 09/12/2024 07:24, Jason Wang wrote:
> >> Caution: External email. Do not open attachments or click links, unless this
> >email comes from a known sender and you know the content is safe.
> >>
> >>
> >> On Mon, Dec 9, 2024 at 2:15 PM CLEMENT MATHIEU--DRIF
> >> <clement.mathieu--drif@eviden.com> wrote:
> >>>
> >>>
> >>> On 09/12/2024 04:13, Jason Wang wrote:
> >>>> Caution: External email. Do not open attachments or click links, unless this
> >email comes from a known sender and you know the content is safe.
> >>>>
> >>>>
> >>>> On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
> >>>> <clement.mathieu--drif@eviden.com> wrote:
> >>>>>
> >>>>>
> >>>>> On 04/12/2024 04:34, Jason Wang wrote:
> >>>>>> Caution: External email. Do not open attachments or click links, unless this
> >email comes from a known sender and you know the content is safe.
> >>>>>>
> >>>>>>
> >>>>>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan
> ><zhenzhong.duan@intel.com> wrote:
> >>>>>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
> >>>>>>> related to scalable mode translation, thus there are multiple
> >combinations.
> >>>>>>>
> >>>>>>> This vIOMMU implementation wants to simplify it with a new property "x-
> >flts".
> >>>>>>> When enabled in scalable mode, first stage translation also known as
> >scalable
> >>>>>>> modern mode is supported. When enabled in legacy mode, throw out
> >error.
> >>>>>>>
> >>>>>>> With scalable modern mode exposed to user, also accurate the pasid
> >entry
> >>>>>>> check in vtd_pe_type_check().
> >>>>>>>
> >>>>>>> Suggested-by: Jason Wang <jasowang@redhat.com>
> >>>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> >>>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> >>>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >>>>>>> ---
> >>>>>>> hw/i386/intel_iommu_internal.h | 2 ++
> >>>>>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
> >>>>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/hw/i386/intel_iommu_internal.h
> >b/hw/i386/intel_iommu_internal.h
> >>>>>>> index 2c977aa7da..e8b211e8b0 100644
> >>>>>>> --- a/hw/i386/intel_iommu_internal.h
> >>>>>>> +++ b/hw/i386/intel_iommu_internal.h
> ...
> >>>>>>> @@ -4737,6 +4742,11 @@ static bool
> >vtd_decide_config(IntelIOMMUState *s, Error **errp)
> >>>>>>> }
> >>>>>>> }
> >>>>>>>
> >>>>>>> + if (!s->scalable_mode && s->scalable_modern) {
> >>>>>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
> >>>>>> This seems to be wired, should we say "scalable mode is needed for
> >>>>>> scalable modern mode"?
> >>>>> Hi Jason,
> >>>>>
> >>>>> We agreed to use the following sentence: "x-flts is only available in
> >>>>> scalable mode"
> >>>>>
> >>>>> Does it look goot to you?
> >>>> Better but if we add more features to the scalable modern, we need to
> >>>> change the error message here.
> >>> Hi Jason
> >>>
> >>> Maybe the weirdness comes from the fact that x-flts on the command line
> >>> is mapped to scalable_modern in the code?
> >> Yes, actually the code checks if scalable mode is enabled if scalable
> >> modern is enabled. But this is inconsistent with the error message
> >> (though x-flts was implied there probably).
> >
> >Would you rename s->scalable_modern to s->flts?
>
> Starting from v4, we replace x-scalable-mode=modern with flts=on on QEMU cmdline.
> Scalable modern mode is an alias of stage-1 page table, so I reuse s->scalable_modern
> in code, I'm fine to rename to s->flts if that's preferred. In that case, maybe we should
> also drop the concept of 'scalable modern mode' totally?
I think so, it helps to reduce the confusion.
Thanks
>
> Thanks
> Zhenzhong
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode
2024-12-11 3:03 ` Jason Wang
@ 2024-12-11 6:08 ` CLEMENT MATHIEU--DRIF
0 siblings, 0 replies; 46+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2024-12-11 6:08 UTC (permalink / raw)
To: Jason Wang, Duan, Zhenzhong
Cc: qemu-devel@nongnu.org, alex.williamson@redhat.com, clg@redhat.com,
eric.auger@redhat.com, mst@redhat.com, peterx@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
Tian, Kevin, Liu, Yi L, Peng, Chao P, Yi Sun, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
On 11/12/2024 04:03, Jason Wang wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> On Wed, Dec 11, 2024 at 10:50 AM Duan, Zhenzhong
> <zhenzhong.duan@intel.com> wrote:
>> Hi Jason, Clement,
>>
>> Sorry for late reply, just back from vacation.
>>
>>> -----Original Message-----
>>> From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>>> Subject: Re: [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for
>>> scalable modern mode
>>>
>>>
>>>
>>>
>>> On 09/12/2024 07:24, Jason Wang wrote:
>>>> Caution: External email. Do not open attachments or click links, unless this
>>> email comes from a known sender and you know the content is safe.
>>>>
>>>> On Mon, Dec 9, 2024 at 2:15 PM CLEMENT MATHIEU--DRIF
>>>> <clement.mathieu--drif@eviden.com> wrote:
>>>>>
>>>>> On 09/12/2024 04:13, Jason Wang wrote:
>>>>>> Caution: External email. Do not open attachments or click links, unless this
>>> email comes from a known sender and you know the content is safe.
>>>>>>
>>>>>> On Wed, Dec 4, 2024 at 2:14 PM CLEMENT MATHIEU--DRIF
>>>>>> <clement.mathieu--drif@eviden.com> wrote:
>>>>>>>
>>>>>>> On 04/12/2024 04:34, Jason Wang wrote:
>>>>>>>> Caution: External email. Do not open attachments or click links, unless this
>>> email comes from a known sender and you know the content is safe.
>>>>>>>>
>>>>>>>> On Mon, Nov 11, 2024 at 4:39 PM Zhenzhong Duan
>>> <zhenzhong.duan@intel.com> wrote:
>>>>>>>>> Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities
>>>>>>>>> related to scalable mode translation, thus there are multiple
>>> combinations.
>>>>>>>>> This vIOMMU implementation wants to simplify it with a new property "x-
>>> flts".
>>>>>>>>> When enabled in scalable mode, first stage translation also known as
>>> scalable
>>>>>>>>> modern mode is supported. When enabled in legacy mode, throw out
>>> error.
>>>>>>>>> With scalable modern mode exposed to user, also accurate the pasid
>>> entry
>>>>>>>>> check in vtd_pe_type_check().
>>>>>>>>>
>>>>>>>>> Suggested-by: Jason Wang <jasowang@redhat.com>
>>>>>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>>>>> ---
>>>>>>>>> hw/i386/intel_iommu_internal.h | 2 ++
>>>>>>>>> hw/i386/intel_iommu.c | 28 +++++++++++++++++++---------
>>>>>>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>>>>>>> index 2c977aa7da..e8b211e8b0 100644
>>>>>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>>>>>> +++ b/hw/i386/intel_iommu_internal.h
>> ...
>>>>>>>>> @@ -4737,6 +4742,11 @@ static bool
>>> vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>>>>>>>> }
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> + if (!s->scalable_mode && s->scalable_modern) {
>>>>>>>>> + error_setg(errp, "Legacy mode: not support x-flts=on");
>>>>>>>> This seems to be wired, should we say "scalable mode is needed for
>>>>>>>> scalable modern mode"?
>>>>>>> Hi Jason,
>>>>>>>
>>>>>>> We agreed to use the following sentence: "x-flts is only available in
>>>>>>> scalable mode"
>>>>>>>
>>>>>>> Does it look goot to you?
>>>>>> Better but if we add more features to the scalable modern, we need to
>>>>>> change the error message here.
>>>>> Hi Jason
>>>>>
>>>>> Maybe the weirdness comes from the fact that x-flts on the command line
>>>>> is mapped to scalable_modern in the code?
>>>> Yes, actually the code checks if scalable mode is enabled if scalable
>>>> modern is enabled. But this is inconsistent with the error message
>>>> (though x-flts was implied there probably).
>>> Would you rename s->scalable_modern to s->flts?
>> Starting from v4, we replace x-scalable-mode=modern with flts=on on QEMU cmdline.
>> Scalable modern mode is an alias of stage-1 page table, so I reuse s->scalable_modern
>> in code, I'm fine to rename to s->flts if that's preferred. In that case, maybe we should
>> also drop the concept of 'scalable modern mode' totally?
> I think so, it helps to reduce the confusion.
>
> Thanks
Yep, at this stage dropping mentions to "modern" is clearer.
Thanks
>cmd
>
>> Thanks
>> Zhenzhong
^ permalink raw reply [flat|nested] 46+ messages in thread
* [PATCH v5 19/20] intel_iommu: Introduce a property to control FS1GP cap bit setting
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (17 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 18/20] intel_iommu: Introduce a property x-flts for scalable modern mode Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-11-11 8:34 ` [PATCH v5 20/20] tests/qtest: Add intel-iommu test Zhenzhong Duan
2024-12-03 9:00 ` [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Duan, Zhenzhong
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Marcel Apfelbaum,
Paolo Bonzini, Richard Henderson, Eduardo Habkost
This gives user flexibility to turn off FS1GP for debug purpose.
It is also useful for future nesting feature. When host IOMMU doesn't
support FS1GP but vIOMMU does, nested page table on host side works
after turning FS1GP off in vIOMMU.
This property has no effect when vIOMMU isn't in scalable modern
mode.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
include/hw/i386/intel_iommu.h | 1 +
hw/i386/intel_iommu.c | 5 ++++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 09ce707930..fa787d5b0d 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -307,6 +307,7 @@ struct IntelIOMMUState {
bool dma_drain; /* Whether DMA r/w draining enabled */
bool dma_translation; /* Whether DMA translation supported */
bool pasid; /* Whether to support PASID */
+ bool fs1gp; /* First Stage 1-GByte Page Support */
/* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
bool stale_tm;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a7a81aebee..043426032c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3834,6 +3834,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
+ DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -4562,7 +4563,9 @@ static void vtd_cap_init(IntelIOMMUState *s)
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
if (s->scalable_modern) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
- s->cap |= VTD_CAP_FS1GP;
+ if (s->fs1gp) {
+ s->cap |= VTD_CAP_FS1GP;
+ }
} else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* [PATCH v5 20/20] tests/qtest: Add intel-iommu test
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (18 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 19/20] intel_iommu: Introduce a property to control FS1GP cap bit setting Zhenzhong Duan
@ 2024-11-11 8:34 ` Zhenzhong Duan
2024-12-03 9:00 ` [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Duan, Zhenzhong
20 siblings, 0 replies; 46+ messages in thread
From: Zhenzhong Duan @ 2024-11-11 8:34 UTC (permalink / raw)
To: qemu-devel
Cc: alex.williamson, clg, eric.auger, mst, peterx, jasowang, jgg,
nicolinc, joao.m.martins, clement.mathieu--drif, kevin.tian,
yi.l.liu, chao.p.peng, Zhenzhong Duan, Thomas Huth,
Marcel Apfelbaum, Fabiano Rosas, Laurent Vivier, Paolo Bonzini
Add the framework to test the intel-iommu device.
Currently only tested cap/ecap bits correctness in scalable
modern mode. Also tested cap/ecap bits consistency before
and after system reset.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Acked-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
MAINTAINERS | 1 +
include/hw/i386/intel_iommu.h | 1 +
tests/qtest/intel-iommu-test.c | 65 ++++++++++++++++++++++++++++++++++
tests/qtest/meson.build | 1 +
4 files changed, 68 insertions(+)
create mode 100644 tests/qtest/intel-iommu-test.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 095420f8b0..de3da859cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3667,6 +3667,7 @@ S: Supported
F: hw/i386/intel_iommu.c
F: hw/i386/intel_iommu_internal.h
F: include/hw/i386/intel_iommu.h
+F: tests/qtest/intel-iommu-test.c
AMD-Vi Emulation
S: Orphan
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index fa787d5b0d..1a4a53053f 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -47,6 +47,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE)
#define VTD_HOST_AW_48BIT 48
#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT
#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1)
+#define VTD_MGAW_FROM_CAP(cap) ((cap >> 16) & 0x3fULL)
#define DMAR_REPORT_F_INTR (1)
diff --git a/tests/qtest/intel-iommu-test.c b/tests/qtest/intel-iommu-test.c
new file mode 100644
index 0000000000..82f5b6efcf
--- /dev/null
+++ b/tests/qtest/intel-iommu-test.c
@@ -0,0 +1,65 @@
+/*
+ * QTest testcase for intel-iommu
+ *
+ * Copyright (c) 2024 Intel, Inc.
+ *
+ * Author: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "hw/i386/intel_iommu_internal.h"
+
+#define CAP_MODERN_FIXED1 (VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | \
+ VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS)
+#define ECAP_MODERN_FIXED1 (VTD_ECAP_QI | VTD_ECAP_IR | VTD_ECAP_IRO | \
+ VTD_ECAP_MHMV | VTD_ECAP_SMTS | VTD_ECAP_FLTS)
+
+static inline uint64_t vtd_reg_readq(QTestState *s, uint64_t offset)
+{
+ return qtest_readq(s, Q35_HOST_BRIDGE_IOMMU_ADDR + offset);
+}
+
+static void test_intel_iommu_modern(void)
+{
+ uint8_t init_csr[DMAR_REG_SIZE]; /* register values */
+ uint8_t post_reset_csr[DMAR_REG_SIZE]; /* register values */
+ uint64_t cap, ecap, tmp;
+ QTestState *s;
+
+ s = qtest_init("-M q35 -device intel-iommu,x-scalable-mode=on,x-flts=on");
+
+ cap = vtd_reg_readq(s, DMAR_CAP_REG);
+ g_assert((cap & CAP_MODERN_FIXED1) == CAP_MODERN_FIXED1);
+
+ tmp = cap & VTD_CAP_SAGAW_MASK;
+ g_assert(tmp == (VTD_CAP_SAGAW_39bit | VTD_CAP_SAGAW_48bit));
+
+ tmp = VTD_MGAW_FROM_CAP(cap);
+ g_assert(tmp == VTD_HOST_AW_48BIT - 1);
+
+ ecap = vtd_reg_readq(s, DMAR_ECAP_REG);
+ g_assert((ecap & ECAP_MODERN_FIXED1) == ECAP_MODERN_FIXED1);
+
+ qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, init_csr, DMAR_REG_SIZE);
+
+ qobject_unref(qtest_qmp(s, "{ 'execute': 'system_reset' }"));
+ qtest_qmp_eventwait(s, "RESET");
+
+ qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, post_reset_csr, DMAR_REG_SIZE);
+ /* Ensure registers are consistent after hard reset */
+ g_assert(!memcmp(init_csr, post_reset_csr, DMAR_REG_SIZE));
+
+ qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ qtest_add_func("/q35/intel-iommu/modern", test_intel_iommu_modern);
+
+ return g_test_run();
+}
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index aa93e98418..83d5474f53 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -93,6 +93,7 @@ qtests_i386 = \
(config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) + \
(config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) + \
(config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \
+ (config_all_devices.has_key('CONFIG_VTD') ? ['intel-iommu-test'] : []) + \
(host_os != 'windows' and \
config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) + \
(config_all_devices.has_key('CONFIG_PCIE_PORT') and \
--
2.34.1
^ permalink raw reply related [flat|nested] 46+ messages in thread
* RE: [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device
2024-11-11 8:34 [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated device Zhenzhong Duan
` (19 preceding siblings ...)
2024-11-11 8:34 ` [PATCH v5 20/20] tests/qtest: Add intel-iommu test Zhenzhong Duan
@ 2024-12-03 9:00 ` Duan, Zhenzhong
20 siblings, 0 replies; 46+ messages in thread
From: Duan, Zhenzhong @ 2024-12-03 9:00 UTC (permalink / raw)
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, peterx@redhat.com, jasowang@redhat.com,
jgg@nvidia.com, nicolinc@nvidia.com, joao.m.martins@oracle.com,
clement.mathieu--drif@eviden.com, Tian, Kevin, Liu, Yi L,
Peng, Chao P
Hi all,
Kindly ping😊, any more comments on this version?
Thanks
Zhenzhong
>-----Original Message-----
>From: Duan, Zhenzhong <zhenzhong.duan@intel.com>
>Subject: [PATCH v5 00/20] intel_iommu: Enable stage-1 translation for emulated
>device
>
>Hi,
>
>Per Jason Wang's suggestion, iommufd nesting series[1] is split into
>"Enable stage-1 translation for emulated device" series and
>"Enable stage-1 translation for passthrough device" series.
>
>This series enables stage-1 translation support for emulated device
>in intel iommu which we called "modern" mode.
>
>PATCH1-5: Some preparing work before support stage-1 translation
>PATCH6-9: Implement stage-1 translation for emulated device
>PATCH10-14:Emulate iotlb invalidation of stage-1 mapping
>PATCH15-17:Set default aw_bits to 48 in all modes, update DMAR table
>PATCH18-19:Expose scalable modern mode "x-flts" and "fs1gp" to cmdline
>PATCH20: Add qtest
>
>Note in spec revision 3.4, it renames "First-level" to "First-stage",
>"Second-level" to "Second-stage". But the scalable mode was added
>before that change. So we keep old favor using First-level/fl/Second-level/sl
>in code but change to use stage-1/stage-2 in commit log.
>But keep in mind First-level/fl/stage-1 all have same meaning,
>same for Second-level/sl/stage-2.
>
>Test done:
>- two VFIO devices hotplug/unplug in legacy and scalable modern/legacy mode
>- vhost with caching-mode=off
>- windows 2019 VM bootup
>
>Qemu code can be found at [2]
>The whole nesting series can be found at [3]
>
>[1] https://lists.gnu.org/archive/html/qemu-devel/2024-01/msg02740.html
>[2]
>https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_stage1_emu_v5
>[3] https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_nesting_rfcv2
>
>Thanks
>Zhenzhong
>
>Changelog:
>v5:
>- add new patch8 to check if translation result fall in ir range (Liuyi)
>- remove unused parameter ih from vtd_piotlb_page_invalidate() (Liuyi)
>- define target as pointer in vtd_find_as_by_sid_and_pasid() (Liuyi)
>- s/x-fls/x-flts (Liuyi)
>- set default aw_bits to 48 for all modes (jason)
>- fix return value of vtd_iova_to_flpte()
>- merge piotlb inv notify to vtd_iotlb_page_invalidate_notify(),
> no functional change
>
>v4:
>- s/Scalable legacy/Scalable in logging (Clement)
>- test the mode first to make the intention clearer (Clement)
>- s/x-cap-fs1gp/fs1gp and s/VTD_FL_RW_MASK/VTD_FL_RW (Jason)
>- introduce x-fls instead of updating x-scalable-mode (Jason)
>- Refine comment log in patch4 (jason)
>- s/tansltion/translation/ and s/VTD_SPTE_RSVD_LEN/VTD_FPTE_RSVD_LEN/
>(Liuyi)
>- update the order and naming of VTD_FPTE_PAGE_* (Liuyi)
>
>v3:
>- drop unnecessary !(s->ecap & VTD_ECAP_SMTS) (Clement)
>- simplify calculation of return value for vtd_iova_fl_check_canonical() (Liuyi)
>- make A/D bit setting atomic (Liuyi)
>- refine error msg (Clement, Liuyi)
>
>v2:
>- check ecap/cap bits instead of s->scalable_modern in vtd_pe_type_check()
>(Clement)
>- declare VTD_ECAP_FLTS/FS1GP after the feature is implemented (Clement)
>- define VTD_INV_DESC_PIOTLB_G (Clement)
>- make error msg consistent in vtd_process_piotlb_desc() (Clement)
>- refine commit log in patch16 (Clement)
>- add VTD_ECAP_IR to ECAP_MODERN_FIXED1 (Clement)
>- add a knob x-cap-fs1gp to control stage-1 1G paging capability
>- collect Clement's R-B
>
>v1:
>- define VTD_HOST_AW_AUTO (Clement)
>- passing pgtt as a parameter to vtd_update_iotlb (Clement)
>- prefix sl_/fl_ to second/first level specific functions (Clement)
>- pick reserved bit check from Clement, add his Co-developed-by
>- Update test without using libqtest-single.h (Thomas)
>
>rfcv2:
>- split from nesting series (Jason)
>- merged some commits from Clement
>- add qtest (jason)
>
>
>Clément Mathieu--Drif (4):
> intel_iommu: Check if the input address is canonical
> intel_iommu: Set accessed and dirty bits during stage-1 translation
> intel_iommu: Add an internal API to find an address space with PASID
> intel_iommu: Add support for PASID-based device IOTLB invalidation
>
>Yi Liu (2):
> intel_iommu: Rename slpte to pte
> intel_iommu: Implement stage-1 translation
>
>Yu Zhang (1):
> intel_iommu: Use the latest fault reasons defined by spec
>
>Zhenzhong Duan (13):
> intel_iommu: Make pasid entry type check accurate
> intel_iommu: Add a placeholder variable for scalable modern mode
> intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb
> invalidation
> intel_iommu: Check stage-1 translation result with interrupt range
> intel_iommu: Flush stage-1 cache in iotlb invalidation
> intel_iommu: Process PASID-based iotlb invalidation
> intel_iommu: piotlb invalidation should notify unmap
> tests/acpi: q35: allow DMAR acpi table changes
> intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2
> tests/acpi: q35: Update host address width in DMAR
> intel_iommu: Introduce a property x-flts for scalable modern mode
> intel_iommu: Introduce a property to control FS1GP cap bit setting
> tests/qtest: Add intel-iommu test
>
> MAINTAINERS | 1 +
> hw/i386/intel_iommu_internal.h | 101 ++++-
> include/hw/i386/intel_iommu.h | 8 +-
> hw/i386/intel_iommu.c | 732 ++++++++++++++++++++++++------
> hw/i386/pc.c | 1 +
> tests/qtest/intel-iommu-test.c | 65 +++
> tests/data/acpi/x86/q35/DMAR.dmar | Bin 120 -> 120 bytes
> tests/qtest/meson.build | 1 +
> 8 files changed, 749 insertions(+), 160 deletions(-)
> create mode 100644 tests/qtest/intel-iommu-test.c
>
>--
>2.34.1
^ permalink raw reply [flat|nested] 46+ messages in thread