* [Qemu-devel] [PATCH v2 2/4] intel_iommu: let iotlb size tunable
2017-07-12 7:36 [Qemu-devel] [PATCH v2 0/4] VT-d: some enhancements on iotlb Peter Xu
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 1/4] intel_iommu: fix VTD_PAGE_MASK Peter Xu
@ 2017-07-12 7:36 ` Peter Xu
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 3/4] intel_iommu: use access_flags for iotlb Peter Xu
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Peter Xu @ 2017-07-12 7:36 UTC (permalink / raw)
To: qemu-devel; +Cc: Michael S . Tsirkin, peterx, Jason Wang
We were having static IOTLB size as 1024. Let it be a tunable. We can
also turns IOTLB off if we want, by specify the size as zero.
The tunable is named as "x-iotlb-size" since that should not really be
something used by user yet, but mostly for debugging purpose now.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/i386/intel_iommu.c | 11 ++++++++++-
hw/i386/intel_iommu_internal.h | 1 -
include/hw/i386/intel_iommu.h | 1 +
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 392da45..9c36866 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -222,6 +222,10 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint64_t key;
int level;
+ if (s->iotlb_size == 0) {
+ return NULL;
+ }
+
for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
source_id, level);
@@ -244,8 +248,12 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint64_t *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
+ if (s->iotlb_size == 0) {
+ return;
+ }
+
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
- if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
+ if (g_hash_table_size(s->iotlb) >= s->iotlb_size) {
trace_vtd_iotlb_reset("iotlb exceeds size limit");
vtd_reset_iotlb(s);
}
@@ -2397,6 +2405,7 @@ static Property vtd_properties[] = {
ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
+ DEFINE_PROP_UINT16("x-iotlb-size", IntelIOMMUState, iotlb_size, 1024),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d1d6290..dc0257c 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -116,7 +116,6 @@
/* The shift of source_id in the key of IOTLB hash table */
#define VTD_IOTLB_SID_SHIFT 36
#define VTD_IOTLB_LVL_SHIFT 52
-#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
#define VTD_TLB_GLOBAL_FLUSH (1ULL << 60) /* Global invalidation */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3e51876..a57f419 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -288,6 +288,7 @@ struct IntelIOMMUState {
uint32_t context_cache_gen; /* Should be in [1,MAX] */
GHashTable *iotlb; /* IOTLB */
+ uint16_t iotlb_size; /* IOTLB max cache entries */
MemoryRegionIOMMUOps iommu_ops;
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH v2 3/4] intel_iommu: use access_flags for iotlb
2017-07-12 7:36 [Qemu-devel] [PATCH v2 0/4] VT-d: some enhancements on iotlb Peter Xu
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 1/4] intel_iommu: fix VTD_PAGE_MASK Peter Xu
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 2/4] intel_iommu: let iotlb size tunable Peter Xu
@ 2017-07-12 7:36 ` Peter Xu
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 4/4] intel_iommu: implement mru list " Peter Xu
2017-07-12 7:47 ` [Qemu-devel] [PATCH v2 0/4] VT-d: some enhancements on iotlb no-reply
4 siblings, 0 replies; 7+ messages in thread
From: Peter Xu @ 2017-07-12 7:36 UTC (permalink / raw)
To: qemu-devel; +Cc: Michael S . Tsirkin, peterx, Jason Wang
It was cached by read/write separately. Let's merge them.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/i386/intel_iommu.c | 15 +++++++--------
include/hw/i386/intel_iommu.h | 3 +--
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9c36866..e17340c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -241,8 +241,7 @@ out:
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
- bool read_flags, bool write_flags,
- uint32_t level)
+ uint8_t access_flags, uint32_t level)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
uint64_t *key = g_malloc(sizeof(*key));
@@ -261,8 +260,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->gfn = gfn;
entry->domain_id = domain_id;
entry->slpte = slpte;
- entry->read_flags = read_flags;
- entry->write_flags = write_flags;
+ entry->access_flags = access_flags;
entry->mask = vtd_slpt_level_page_mask(level);
*key = vtd_get_iotlb_key(gfn, source_id, level);
g_hash_table_replace(s->iotlb, key, entry);
@@ -1095,6 +1093,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
+ uint8_t access_flags;
VTDIOTLBEntry *iotlb_entry;
/*
@@ -1109,8 +1108,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
iotlb_entry->domain_id);
slpte = iotlb_entry->slpte;
- reads = iotlb_entry->read_flags;
- writes = iotlb_entry->write_flags;
+ access_flags = iotlb_entry->access_flags;
page_mask = iotlb_entry->mask;
goto out;
}
@@ -1180,13 +1178,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
page_mask = vtd_slpt_level_page_mask(level);
+ access_flags = IOMMU_ACCESS_FLAG(reads, writes);
vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
- reads, writes, level);
+ access_flags, level);
out:
entry->iova = addr & page_mask;
entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
entry->addr_mask = ~page_mask;
- entry->perm = IOMMU_ACCESS_FLAG(reads, writes);
+ entry->perm = access_flags;
return true;
error:
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index a57f419..1b51c9f 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -101,8 +101,7 @@ struct VTDIOTLBEntry {
uint16_t domain_id;
uint64_t slpte;
uint64_t mask;
- bool read_flags;
- bool write_flags;
+ uint8_t access_flags;
};
/* VT-d Source-ID Qualifier types */
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH v2 4/4] intel_iommu: implement mru list for iotlb
2017-07-12 7:36 [Qemu-devel] [PATCH v2 0/4] VT-d: some enhancements on iotlb Peter Xu
` (2 preceding siblings ...)
2017-07-12 7:36 ` [Qemu-devel] [PATCH v2 3/4] intel_iommu: use access_flags for iotlb Peter Xu
@ 2017-07-12 7:36 ` Peter Xu
2017-07-12 7:47 ` [Qemu-devel] [PATCH v2 0/4] VT-d: some enhancements on iotlb no-reply
4 siblings, 0 replies; 7+ messages in thread
From: Peter Xu @ 2017-07-12 7:36 UTC (permalink / raw)
To: qemu-devel; +Cc: Michael S . Tsirkin, peterx, Jason Wang
It is not wise to disgard all the IOTLB cache when cache size reaches
max, but that's what we do now. A slightly better (but still simple) way
to do this is, we just throw away the least recent used cache entry.
This patch implemented MRU list algorithm for VT-d IOTLB. The main logic
is to maintain a Most Recently Used list for the IOTLB entries. The hash
table is still used for lookup, though a new list field is added to each
IOTLB entry for a iotlb MRU list. For each active IOTLB entry, it's both
in the hash table in s->iotlb, and also linked into the MRU list of
s->iotlb_head. The hash helps in fast lookup, and the MRU list helps in
knowing whether the cache is still hot.
After we have such a MRU list, replacing all the iterators of IOTLB
entries by using list iterations rather than hash table iterations.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/i386/intel_iommu.c | 75 +++++++++++++++++++++++++-----------------
hw/i386/intel_iommu_internal.h | 8 -----
hw/i386/trace-events | 1 -
include/hw/i386/intel_iommu.h | 6 +++-
4 files changed, 50 insertions(+), 40 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e17340c..6320dea 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,6 +37,9 @@
#include "kvm_i386.h"
#include "trace.h"
+#define FOREACH_IOTLB_SAFE(entry, s, entry_n) \
+ QTAILQ_FOREACH_SAFE(entry, &(s)->iotlb_head, link, entry_n)
+
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -139,14 +142,6 @@ static guint vtd_uint64_hash(gconstpointer v)
return (guint)*(const uint64_t *)v;
}
-static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
- gpointer user_data)
-{
- VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
- uint16_t domain_id = *(uint16_t *)user_data;
- return entry->domain_id == domain_id;
-}
-
/* The shift of an addr for a certain level of paging structure */
static inline uint32_t vtd_slpt_level_shift(uint32_t level)
{
@@ -159,18 +154,6 @@ static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
}
-static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
- gpointer user_data)
-{
- VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
- VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
- uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
- uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
- return (entry->domain_id == info->domain_id) &&
- (((entry->gfn & info->mask) == gfn) ||
- (entry->gfn == gfn_tlb));
-}
-
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
* IntelIOMMUState to 1.
*/
@@ -201,6 +184,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
{
assert(s->iotlb);
g_hash_table_remove_all(s->iotlb);
+ QTAILQ_INIT(&s->iotlb_head);
}
static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
@@ -231,6 +215,9 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
source_id, level);
entry = g_hash_table_lookup(s->iotlb, &key);
if (entry) {
+ /* Move the entry to the head of MRU list */
+ QTAILQ_REMOVE(&s->iotlb_head, entry, link);
+ QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
goto out;
}
}
@@ -239,11 +226,23 @@ out:
return entry;
}
+static void vtd_iotlb_remove_entry(IntelIOMMUState *s, VTDIOTLBEntry *entry)
+{
+ uint64_t key = entry->key;
+
+ /*
+ * To remove an entry, we need to both remove it from the MRU
+ * list, and also from the hash table.
+ */
+ QTAILQ_REMOVE(&s->iotlb_head, entry, link);
+ g_hash_table_remove(s->iotlb, &key);
+}
+
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
uint8_t access_flags, uint32_t level)
{
- VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
+ VTDIOTLBEntry *entry = g_new0(VTDIOTLBEntry, 1), *last;
uint64_t *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
@@ -253,8 +252,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
if (g_hash_table_size(s->iotlb) >= s->iotlb_size) {
- trace_vtd_iotlb_reset("iotlb exceeds size limit");
- vtd_reset_iotlb(s);
+ /* Remove the Least Recently Used cache */
+ last = QTAILQ_LAST(&s->iotlb_head, VTDIOTLBEntryHead);
+ vtd_iotlb_remove_entry(s, last);
}
entry->gfn = gfn;
@@ -263,7 +263,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->access_flags = access_flags;
entry->mask = vtd_slpt_level_page_mask(level);
*key = vtd_get_iotlb_key(gfn, source_id, level);
+ entry->key = *key;
g_hash_table_replace(s->iotlb, key, entry);
+
+ /* Update MRU list */
+ QTAILQ_INSERT_HEAD(&s->iotlb_head, entry, link);
}
/* Given the reg addr of both the message data and address, generate an
@@ -1354,11 +1358,15 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
IntelIOMMUNotifierNode *node;
VTDContextEntry ce;
VTDAddressSpace *vtd_as;
+ VTDIOTLBEntry *entry, *entry_n;
trace_vtd_inv_desc_iotlb_domain(domain_id);
- g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
- &domain_id);
+ FOREACH_IOTLB_SAFE(entry, s, entry_n) {
+ if (entry->domain_id == domain_id) {
+ vtd_iotlb_remove_entry(s, entry);
+ }
+ }
QLIST_FOREACH(node, &s->notifiers_list, next) {
vtd_as = node->vtd_as;
@@ -1400,15 +1408,22 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
hwaddr addr, uint8_t am)
{
- VTDIOTLBPageInvInfo info;
+ VTDIOTLBEntry *entry, *entry_n;
+ uint64_t gfn, mask;
trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
assert(am <= VTD_MAMV);
- info.domain_id = domain_id;
- info.addr = addr;
- info.mask = ~((1 << am) - 1);
- g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+
+ mask = ~((1 << am) - 1);
+ gfn = (addr >> VTD_PAGE_SHIFT) & mask;
+
+ FOREACH_IOTLB_SAFE(entry, s, entry_n) {
+ if (entry->domain_id == domain_id && (entry->gfn & mask) == gfn) {
+ vtd_iotlb_remove_entry(s, entry);
+ }
+ }
+
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
}
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index dc0257c..84e68e1 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -372,14 +372,6 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
-/* Information about page-selective IOTLB invalidate */
-struct VTDIOTLBPageInvInfo {
- uint16_t domain_id;
- uint64_t addr;
- uint8_t mask;
-};
-typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
-
/* Pagesize of VTD paging structures, including root and context tables */
#define VTD_PAGE_SHIFT 12
#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 42d8a7e..b552815 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -34,7 +34,6 @@ vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain)
vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
-vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
vtd_fault_disabled(void) "Fault processing disabled for context entry"
vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 1b51c9f..d2caab3 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -98,9 +98,11 @@ struct VTDBus {
struct VTDIOTLBEntry {
uint64_t gfn;
- uint16_t domain_id;
uint64_t slpte;
uint64_t mask;
+ uint64_t key;
+ QTAILQ_ENTRY(VTDIOTLBEntry) link;
+ uint16_t domain_id;
uint8_t access_flags;
};
@@ -288,6 +290,8 @@ struct IntelIOMMUState {
uint32_t context_cache_gen; /* Should be in [1,MAX] */
GHashTable *iotlb; /* IOTLB */
uint16_t iotlb_size; /* IOTLB max cache entries */
+ /* Head of IOTLB MRU list */
+ QTAILQ_HEAD(VTDIOTLBEntryHead, VTDIOTLBEntry) iotlb_head;
MemoryRegionIOMMUOps iommu_ops;
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread