From: Jason Wang <jasowang@redhat.com>
To: Peter Xu <peterx@redhat.com>
Cc: yi.l.liu@intel.com, yi.y.sun@linux.intel.com,
qemu-devel@nongnu.org, mst@redhat.com
Subject: Re: [PATCH 2/3] intel-iommu: drop VTDBus
Date: Fri, 14 Jan 2022 10:32:16 +0800 [thread overview]
Message-ID: <b542cb8d-d1f4-6583-a89e-49dedafc77d4@redhat.com> (raw)
In-Reply-To: <Yd+mlM7oqqOkFtO4@xz-m1.local>
在 2022/1/13 下午12:12, Peter Xu 写道:
> On Wed, Jan 05, 2022 at 12:19:44PM +0800, Jason Wang wrote:
>> We introduce VTDBus structure as an intermediate step for searching
>> the address space. This works well with SID based matching/lookup. But
>> when we want to support SID plus PASID based address space lookup,
>> this intermediate steps turns out to be a burden. So the patch simply
>> drops the VTDBus structure and use the PCIBus and devfn as the key for
>> the g_hash_table(). This simplifies the codes and the future PASID
>> extension.
>>
>> This may case slight slower for the vtd_find_as_from_bus_num() callers
>> but since they are all called from the control path, we can afford it.
> The only one I found is vtd_process_device_iotlb_desc() that may got affected
> the most; the rest look mostly always traversing all the address space anyway
> so shouldn't hurt.
>
> I think dev-iotlb can be invalidated in IO path too when kernel device drivers
> are used? It shouldn't affect much when the VM has a few devices, but IIUC
> it'll further slow down the kernel drivers on vIOMMU. Maybe it's not a huge
> problem either.
Maybe we can keep maintaining a cache for some speedup for the searching
for NO_PASID.
>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>> hw/i386/intel_iommu.c | 183 +++++++++++++---------------------
>> include/hw/i386/intel_iommu.h | 10 +-
>> 2 files changed, 69 insertions(+), 124 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index f2c7a23712..58c682097b 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -61,6 +61,11 @@
>> } \
>> }
>>
>> +struct vtd_as_key {
>> + PCIBus *bus;
>> + uint8_t devfn;
>> +};
>> +
>> static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>>
>> @@ -190,12 +195,18 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
>> /* GHashTable functions */
>> static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
>> {
>> - return *((const uint64_t *)v1) == *((const uint64_t *)v2);
>> + const struct vtd_as_key *key1 = v1;
>> + const struct vtd_as_key *key2 = v2;
>> +
>> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> }
>>
>> static guint vtd_uint64_hash(gconstpointer v)
>> {
>> - return (guint)*(const uint64_t *)v;
>> + const struct vtd_as_key *key = v;
>> + guint value = (guint)(uintptr_t)key->bus;
>> +
>> + return (guint)(value << 8 | key->devfn);
> Note that value is a pointer to PCIBus*. Just want to check with you that it's
> intended to use this hash value (or maybe you wanted to use Source ID so it is
> bus number to use not the bus pointer)?
Right, SID should be used here.
>
>> }
>>
>> static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
>> @@ -236,22 +247,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
>> static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
>> {
>> VTDAddressSpace *vtd_as;
>> - VTDBus *vtd_bus;
>> - GHashTableIter bus_it;
>> - uint32_t devfn_it;
>> + GHashTableIter as_it;
>>
>> trace_vtd_context_cache_reset();
>>
>> - g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
>> + g_hash_table_iter_init(&as_it, s->vtd_as);
>>
>> - while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
>> - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
>> - vtd_as = vtd_bus->dev_as[devfn_it];
>> - if (!vtd_as) {
>> - continue;
>> - }
>> - vtd_as->context_cache_entry.context_cache_gen = 0;
>> - }
>> + while (g_hash_table_iter_next (&as_it, NULL, (void**)&vtd_as)) {
>> + vtd_as->context_cache_entry.context_cache_gen = 0;
>> }
>> s->context_cache_gen = 1;
>> }
>> @@ -986,32 +989,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
>> return slpte & rsvd_mask;
>> }
>>
>> -/* Find the VTD address space associated with a given bus number */
>> -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
>> -{
>> - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
>> - GHashTableIter iter;
>> -
>> - if (vtd_bus) {
>> - return vtd_bus;
>> - }
>> -
>> - /*
>> - * Iterate over the registered buses to find the one which
>> - * currently holds this bus number and update the bus_num
>> - * lookup table.
>> - */
>> - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
>> - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
>> - if (pci_bus_num(vtd_bus->bus) == bus_num) {
>> - s->vtd_as_by_bus_num[bus_num] = vtd_bus;
>> - return vtd_bus;
>> - }
>> - }
>> -
>> - return NULL;
>> -}
>> -
>> /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
>> * of the translation, can be used for deciding the size of large page.
>> */
>> @@ -1604,18 +1581,12 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
>>
>> static void vtd_switch_address_space_all(IntelIOMMUState *s)
>> {
>> + VTDAddressSpace *vtd_as;
>> GHashTableIter iter;
>> - VTDBus *vtd_bus;
>> - int i;
>> -
>> - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
>> - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
>> - for (i = 0; i < PCI_DEVFN_MAX; i++) {
>> - if (!vtd_bus->dev_as[i]) {
>> - continue;
>> - }
>> - vtd_switch_address_space(vtd_bus->dev_as[i]);
>> - }
>> +
>> + g_hash_table_iter_init(&iter, s->vtd_as);
>> + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
>> + vtd_switch_address_space(vtd_as);
>> }
>> }
>>
>> @@ -1659,16 +1630,11 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>>
>> static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
>> {
>> - VTDBus *vtd_bus;
>> VTDAddressSpace *vtd_as;
>> bool success = false;
>> + uintptr_t key = (uintptr_t)source_id;
>>
>> - vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
>> - if (!vtd_bus) {
>> - goto out;
>> - }
>> -
>> - vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
>> + vtd_as = g_hash_table_lookup(s->vtd_as, &key);
> I'm slightly confused - what I read below tells me that the key is "struct
> vtd_as_key" at [1] below, but here it's a uintptr_t contains the SID. I don't
> think they match.. Did I misread something?
Nope, it looks like a bug, it looks to me we can't simply use SID here
since the key requires a pointer to PCIBus. The reason that we can't
simply use SID here is that the dma_as is initialized before the guest
can initialize the root port where we may end up with wrong bus number.
I will fix this by using g_hash_table_find() which could be slow but
consider this function is not a frequent operation it should be acceptable.
>
>> if (!vtd_as) {
>> goto out;
>> }
>> @@ -1876,11 +1842,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>> uint16_t source_id,
>> uint16_t func_mask)
>> {
>> + GHashTableIter as_it;
>> uint16_t mask;
>> - VTDBus *vtd_bus;
>> VTDAddressSpace *vtd_as;
>> uint8_t bus_n, devfn;
>> - uint16_t devfn_it;
>>
>> trace_vtd_inv_desc_cc_devices(source_id, func_mask);
>>
>> @@ -1903,32 +1868,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>> mask = ~mask;
>>
>> bus_n = VTD_SID_TO_BUS(source_id);
>> - vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
>> - if (vtd_bus) {
>> - devfn = VTD_SID_TO_DEVFN(source_id);
>> - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
>> - vtd_as = vtd_bus->dev_as[devfn_it];
>> - if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
>> - trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
>> - VTD_PCI_FUNC(devfn_it));
>> - vtd_iommu_lock(s);
>> - vtd_as->context_cache_entry.context_cache_gen = 0;
>> - vtd_iommu_unlock(s);
>> - /*
>> - * Do switch address space when needed, in case if the
>> - * device passthrough bit is switched.
>> - */
>> - vtd_switch_address_space(vtd_as);
>> - /*
>> - * So a device is moving out of (or moving into) a
>> - * domain, resync the shadow page table.
>> - * This won't bring bad even if we have no such
>> - * notifier registered - the IOMMU notification
>> - * framework will skip MAP notifications if that
>> - * happened.
>> - */
>> - vtd_sync_shadow_page_table(vtd_as);
>> - }
>> + devfn = VTD_SID_TO_DEVFN(source_id);
>> +
>> + g_hash_table_iter_init(&as_it, s->vtd_as);
>> + while (g_hash_table_iter_next(&as_it, NULL, (void**)&vtd_as)) {
>> + if ((pci_bus_num(vtd_as->bus) == bus_n) &&
>> + (vtd_as->devfn & mask) == (devfn & mask)) {
>> + trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
>> + VTD_PCI_FUNC(vtd_as->devfn));
>> + vtd_iommu_lock(s);
>> + vtd_as->context_cache_entry.context_cache_gen = 0;
>> + vtd_iommu_unlock(s);
>> + /*
>> + * Do switch address space when needed, in case if the
>> + * device passthrough bit is switched.
>> + */
>> + vtd_switch_address_space(vtd_as);
>> + /*
>> + * So a device is moving out of (or moving into) a
>> + * domain, resync the shadow page table.
>> + * This won't bring bad even if we have no such
>> + * notifier registered - the IOMMU notification
>> + * framework will skip MAP notifications if that
>> + * happened.
>> + */
>> + vtd_sync_shadow_page_table(vtd_as);
>> }
>> }
>> }
>> @@ -2442,18 +2406,14 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>> {
>> VTDAddressSpace *vtd_dev_as;
>> IOMMUTLBEvent event;
>> - struct VTDBus *vtd_bus;
>> + uintptr_t key;
>> hwaddr addr;
>> uint64_t sz;
>> uint16_t sid;
>> - uint8_t devfn;
>> bool size;
>> - uint8_t bus_num;
>>
>> addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>> sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
>> - devfn = sid & 0xff;
>> - bus_num = sid >> 8;
>> size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>>
>> if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
>> @@ -2464,12 +2424,8 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>> return false;
>> }
>>
>> - vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
>> - if (!vtd_bus) {
>> - goto done;
>> - }
>> -
>> - vtd_dev_as = vtd_bus->dev_as[devfn];
>> + key = (uintptr_t)sid;
>> + vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
> Same question here.
See above reply.
Thanks
>
>> if (!vtd_dev_as) {
>> goto done;
>> }
>> @@ -3390,27 +3346,22 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
>>
>> VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>> {
>> - uintptr_t key = (uintptr_t)bus;
>> - VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
>> - VTDAddressSpace *vtd_dev_as;
>> + struct vtd_as_key key = {
>> + .bus = bus,
>> + .devfn = devfn,
>> + };
>> + VTDAddressSpace *vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
>> char name[128];
>>
>> - if (!vtd_bus) {
>> - uintptr_t *new_key = g_malloc(sizeof(*new_key));
>> - *new_key = (uintptr_t)bus;
>> - /* No corresponding free() */
>> - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
>> - PCI_DEVFN_MAX);
>> - vtd_bus->bus = bus;
>> - g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
>> - }
>> + if (!vtd_dev_as) {
>> + struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
>>
>> - vtd_dev_as = vtd_bus->dev_as[devfn];
>> + new_key->bus = bus;
>> + new_key->devfn = devfn;
>>
>> - if (!vtd_dev_as) {
>> snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
>> PCI_FUNC(devfn));
>> - vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
>> + vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
>>
>> vtd_dev_as->bus = bus;
>> vtd_dev_as->devfn = (uint8_t)devfn;
>> @@ -3466,6 +3417,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>> &vtd_dev_as->nodmar, 0);
>>
>> vtd_switch_address_space(vtd_dev_as);
>> +
>> + g_hash_table_insert(s->vtd_as, new_key, vtd_dev_as);
> [1]
>
>> }
>> return vtd_dev_as;
>> }
>> @@ -3750,6 +3703,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>> assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>>
>> vtd_as = vtd_find_add_as(s, bus, devfn);
>> +
>> return &vtd_as->as;
>> }
>>
>> @@ -3835,7 +3789,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>>
>> QLIST_INIT(&s->vtd_as_with_notifiers);
>> qemu_mutex_init(&s->iommu_lock);
>> - memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
>> memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
>> "intel_iommu", DMAR_REG_SIZE);
>>
>> @@ -3857,8 +3810,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>> /* No corresponding destroy */
>> s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
>> g_free, g_free);
>> - s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
>> - g_free, g_free);
>> + s->vtd_as = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
>> + g_free, g_free);
>> vtd_init(s);
>> sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
>> pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 41783ee46d..014d77dc2a 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -58,7 +58,6 @@ typedef struct VTDContextEntry VTDContextEntry;
>> typedef struct VTDContextCacheEntry VTDContextCacheEntry;
>> typedef struct VTDAddressSpace VTDAddressSpace;
>> typedef struct VTDIOTLBEntry VTDIOTLBEntry;
>> -typedef struct VTDBus VTDBus;
>> typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>> @@ -111,12 +110,6 @@ struct VTDAddressSpace {
>> IOVATree *iova_tree; /* Traces mapped IOVA ranges */
>> };
>>
>> -struct VTDBus {
>> - PCIBus* bus; /* A reference to the bus to provide translation for */
>> - /* A table of VTDAddressSpace objects indexed by devfn */
>> - VTDAddressSpace *dev_as[];
>> -};
>> -
>> struct VTDIOTLBEntry {
>> uint64_t gfn;
>> uint16_t domain_id;
>> @@ -252,8 +245,7 @@ struct IntelIOMMUState {
>> uint32_t context_cache_gen; /* Should be in [1,MAX] */
>> GHashTable *iotlb; /* IOTLB */
>>
>> - GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
>> - VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
>> + GHashTable *vtd_as; /* VTD address space indexed by source id*/
>> /* list of registered notifiers */
>> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> --
>> 2.25.1
>>
next prev parent reply other threads:[~2022-01-14 2:33 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-05 4:19 [PATCH 0/3] PASID support for Intel IOMMU Jason Wang
2022-01-05 4:19 ` [PATCH] intel-iommu: correctly check passthrough during translation Jason Wang
2022-01-05 4:19 ` [PATCH 1/3] intel-iommu: don't warn guest errors when getting rid2pasid entry Jason Wang
2022-01-13 3:35 ` Peter Xu
2022-01-13 6:16 ` Jason Wang
2022-01-13 6:32 ` Peter Xu
2022-01-13 7:05 ` Michael S. Tsirkin
2022-01-14 3:02 ` Jason Wang
2022-01-13 7:06 ` Michael S. Tsirkin
2022-01-14 2:56 ` Jason Wang
2022-01-05 4:19 ` [PATCH 2/3] intel-iommu: drop VTDBus Jason Wang
2022-01-13 4:12 ` Peter Xu
2022-01-14 2:32 ` Jason Wang [this message]
2022-01-14 9:15 ` Jason Wang
2022-01-17 1:27 ` Peter Xu
2022-01-17 1:42 ` Peter Xu
2022-01-05 4:19 ` [PATCH 3/3] intel-iommu: PASID support Jason Wang
2022-01-13 5:06 ` Peter Xu
2022-01-13 7:16 ` Michael S. Tsirkin
2022-01-14 2:47 ` Jason Wang
2022-01-14 3:31 ` Peter Xu
2022-01-14 5:58 ` Jason Wang
2022-01-14 7:13 ` Peter Xu
2022-01-14 7:22 ` Jason Wang
2022-01-14 7:45 ` Peter Xu
2022-01-14 9:12 ` Jason Wang
2022-01-14 12:58 ` Liu Yi L
2022-01-17 6:01 ` Jason Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b542cb8d-d1f4-6583-a89e-49dedafc77d4@redhat.com \
--to=jasowang@redhat.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=yi.l.liu@intel.com \
--cc=yi.y.sun@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).