qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
       [not found] ` <1492426712-12230-8-git-send-email-peterx@redhat.com>
@ 2017-04-18  3:23   ` Jason Wang
  2017-04-18  3:50     ` Peter Xu
  0 siblings, 1 reply; 7+ messages in thread
From: Jason Wang @ 2017-04-18  3:23 UTC (permalink / raw)
  To: Peter Xu, qemu-devel
  Cc: David Gibson, Lan Tianyu, Marcel Apfelbaum, Michael S.Tsirkin,
	"  peterx"



On 2017年04月17日 18:58, Peter Xu wrote:
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   hw/i386/intel_iommu.c          | 109 +++++++++++++++++++++++++++++++++--------
>   hw/i386/intel_iommu_internal.h |   1 +
>   hw/i386/trace-events           |   1 +
>   hw/i386/x86-iommu.c            |   1 +
>   include/hw/i386/x86-iommu.h    |   1 +
>   5 files changed, 93 insertions(+), 20 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 05ae631..deb2007 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -872,7 +872,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>       } else {
>           switch (vtd_ce_get_type(ce)) {
>           case VTD_CONTEXT_TT_MULTI_LEVEL:
> -            /* fall through */
> +        case VTD_CONTEXT_TT_PASS_THROUGH:
>           case VTD_CONTEXT_TT_DEV_IOTLB:
>               break;
>           default:
> @@ -883,6 +883,73 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>       return 0;
>   }
>   
> +/* Fetch translation type for specific device. Returns <0 if error
> + * happens, otherwise return the shifted type to check against
> + * VTD_CONTEXT_TT_*. */
> +static int vtd_dev_get_trans_type(VTDAddressSpace *as)
> +{
> +    IntelIOMMUState *s;
> +    VTDContextEntry ce;
> +    int ret;
> +
> +    s = as->iommu_state;
> +
> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
> +                                   as->devfn, &ce);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return vtd_ce_get_type(&ce);
> +}
> +
> +static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
> +{
> +    int ret;
> +
> +    assert(as);
> +
> +    ret = vtd_dev_get_trans_type(as);
> +    if (ret < 0) {
> +        /*
> +         * Possibly failed to parse the context entry for some reason
> +         * (e.g., during init, or any guest configuration errors on
> +         * context entries). We should assume PT not enabled for
> +         * safety.
> +         */
> +        return false;
> +    }
> +
> +    return ret == VTD_CONTEXT_TT_PASS_THROUGH;
> +}
> +
> +static void vtd_switch_address_space(VTDAddressSpace *as)
> +{
> +    bool use_iommu;
> +
> +    assert(as);
> +
> +    use_iommu = as->iommu_state->dmar_enabled;
> +    if (use_iommu) {
> +        /* Further checks per-device configuration */
> +        use_iommu &= !vtd_dev_pt_enabled(as);
> +    }

Looks like you can use as->iommu_state->dmar_enabled && 
!vtd_dev_pt_enabled(as)

> +
> +    trace_vtd_switch_address_space(pci_bus_num(as->bus),
> +                                   VTD_PCI_SLOT(as->devfn),
> +                                   VTD_PCI_FUNC(as->devfn),
> +                                   use_iommu);
> +
> +    /* Turn off first then on the other */
> +    if (use_iommu) {
> +        memory_region_set_enabled(&as->sys_alias, false);
> +        memory_region_set_enabled(&as->iommu, true);
> +    } else {
> +        memory_region_set_enabled(&as->iommu, false);
> +        memory_region_set_enabled(&as->sys_alias, true);
> +    }
> +}
> +
>   static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
>   {
>       return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> @@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>           cc_entry->context_cache_gen = s->context_cache_gen;
>       }
>   
> +    /*
> +     * We don't need to translate for pass-through context entries.
> +     * Also, let's ignore IOTLB caching as well for PT devices.
> +     */
> +    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
> +        entry->translated_addr = entry->iova;
> +        entry->addr_mask = VTD_PAGE_SIZE - 1;
> +        entry->perm = IOMMU_RW;
> +        trace_vtd_translate_pt(source_id, entry->iova);
> +        return;
> +    }

Several questions here:

1) Is this just for vhost?
2) Since this is done after IOTLB querying, do we need flush IOTLB 
during address switching?

> +
>       ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
>                                  &reads, &writes);
>       if (ret_fr) {
> @@ -1135,6 +1214,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>                                                VTD_PCI_FUNC(devfn_it));
>                   vtd_as->context_cache_entry.context_cache_gen = 0;
>                   /*
> +                 * Do switch address space when needed, in case if the
> +                 * device passthrough bit is switched.
> +                 */
> +                vtd_switch_address_space(vtd_as);
> +                /*
>                    * So a device is moving out of (or moving into) a
>                    * domain, a replay() suites here to notify all the
>                    * IOMMU_NOTIFIER_MAP registers about this change.
> @@ -1366,25 +1450,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
>       vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
>   }
>   
> -static void vtd_switch_address_space(VTDAddressSpace *as)
> -{
> -    assert(as);
> -
> -    trace_vtd_switch_address_space(pci_bus_num(as->bus),
> -                                   VTD_PCI_SLOT(as->devfn),
> -                                   VTD_PCI_FUNC(as->devfn),
> -                                   as->iommu_state->dmar_enabled);
> -
> -    /* Turn off first then on the other */
> -    if (as->iommu_state->dmar_enabled) {
> -        memory_region_set_enabled(&as->sys_alias, false);
> -        memory_region_set_enabled(&as->iommu, true);
> -    } else {
> -        memory_region_set_enabled(&as->iommu, false);
> -        memory_region_set_enabled(&as->sys_alias, true);
> -    }
> -}
> -
>   static void vtd_switch_address_space_all(IntelIOMMUState *s)
>   {
>       GHashTableIter iter;
> @@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
>           s->ecap |= VTD_ECAP_DT;
>       }
>   
> +    if (x86_iommu->pt_supported) {
> +        s->ecap |= VTD_ECAP_PT;
> +    }

Since we support migration now, need compat this for pre 2.10.

> +
>       if (s->caching_mode) {
>           s->cap |= VTD_CAP_CM;
>       }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 29d6707..0e73a65 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -187,6 +187,7 @@
>   /* Interrupt Remapping support */
>   #define VTD_ECAP_IR                 (1ULL << 3)
>   #define VTD_ECAP_EIM                (1ULL << 4)
> +#define VTD_ECAP_PT                 (1ULL << 6)
>   #define VTD_ECAP_MHMV               (15ULL << 20)
>   
>   /* CAP_REG */
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 04a6980..867ad0b 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -38,6 +38,7 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P
>   vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
>   vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
>   vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
> +vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
>   
>   # hw/i386/amd_iommu.c
>   amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
> diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
> index 02b8825..293caf8 100644
> --- a/hw/i386/x86-iommu.c
> +++ b/hw/i386/x86-iommu.c
> @@ -91,6 +91,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
>   static Property x86_iommu_properties[] = {
>       DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
>       DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
> +    DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),

Do you know if AMD IOMMU support this?

Thanks

>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
> index 361c07c..ef89c0c 100644
> --- a/include/hw/i386/x86-iommu.h
> +++ b/include/hw/i386/x86-iommu.h
> @@ -74,6 +74,7 @@ struct X86IOMMUState {
>       SysBusDevice busdev;
>       bool intr_supported;        /* Whether vIOMMU supports IR */
>       bool dt_supported;          /* Whether vIOMMU supports DT */
> +    bool pt_supported;          /* Whether vIOMMU supports pass-through */
>       IommuType type;             /* IOMMU type - AMD/Intel     */
>       QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
>   };

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-18  3:23   ` [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT) Jason Wang
@ 2017-04-18  3:50     ` Peter Xu
  2017-04-18  4:00       ` Jason Wang
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Xu @ 2017-04-18  3:50 UTC (permalink / raw)
  To: Jason Wang
  Cc: qemu-devel, David Gibson, Lan Tianyu, Marcel Apfelbaum,
	Michael S.Tsirkin

On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
> On 2017年04月17日 18:58, Peter Xu wrote:

[...]

> >+static void vtd_switch_address_space(VTDAddressSpace *as)
> >+{
> >+    bool use_iommu;
> >+
> >+    assert(as);
> >+
> >+    use_iommu = as->iommu_state->dmar_enabled;
> >+    if (use_iommu) {
> >+        /* Further checks per-device configuration */
> >+        use_iommu &= !vtd_dev_pt_enabled(as);
> >+    }
> 
> Looks like you can use as->iommu_state->dmar_enabled &&
> !vtd_dev_pt_enabled(as)

vtd_dev_pt_enalbed() needs to read the guest memory (starting from
reading root entry), which is slightly slow. I was trying to avoid
unecessary reads.

[...]

> >@@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> >          cc_entry->context_cache_gen = s->context_cache_gen;
> >      }
> >+    /*
> >+     * We don't need to translate for pass-through context entries.
> >+     * Also, let's ignore IOTLB caching as well for PT devices.
> >+     */
> >+    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
> >+        entry->translated_addr = entry->iova;
> >+        entry->addr_mask = VTD_PAGE_SIZE - 1;
> >+        entry->perm = IOMMU_RW;
> >+        trace_vtd_translate_pt(source_id, entry->iova);
> >+        return;
> >+    }
> 
> Several questions here:
> 
> 1) Is this just for vhost?

No. When caching mode is not enabled, all passthroughed devices should
be using this path.

> 2) Since this is done after IOTLB querying, do we need flush IOTLB during
> address switching?

IMHO if guest switches address space for a device, it is required to
send IOTLB flush as well for that device/domain.

[...]

> >  static void vtd_switch_address_space_all(IntelIOMMUState *s)
> >  {
> >      GHashTableIter iter;
> >@@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
> >          s->ecap |= VTD_ECAP_DT;
> >      }
> >+    if (x86_iommu->pt_supported) {
> >+        s->ecap |= VTD_ECAP_PT;
> >+    }
> 
> Since we support migration now, need compat this for pre 2.10.

Oh yes. If I set pt=off by default, it should be okay then, right?

Then, at some point, we can switch to on by default, with a touch-up
in include/hw/compat.h I guess?

> 
> >+
> >      if (s->caching_mode) {
> >          s->cap |= VTD_CAP_CM;
> >      }
> >diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> >index 29d6707..0e73a65 100644
> >--- a/hw/i386/intel_iommu_internal.h
> >+++ b/hw/i386/intel_iommu_internal.h
> >@@ -187,6 +187,7 @@
> >  /* Interrupt Remapping support */
> >  #define VTD_ECAP_IR                 (1ULL << 3)
> >  #define VTD_ECAP_EIM                (1ULL << 4)
> >+#define VTD_ECAP_PT                 (1ULL << 6)
> >  #define VTD_ECAP_MHMV               (15ULL << 20)
> >  /* CAP_REG */
> >diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> >index 04a6980..867ad0b 100644
> >--- a/hw/i386/trace-events
> >+++ b/hw/i386/trace-events
> >@@ -38,6 +38,7 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P
> >  vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
> >  vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
> >  vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
> >+vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
> >  # hw/i386/amd_iommu.c
> >  amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
> >diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
> >index 02b8825..293caf8 100644
> >--- a/hw/i386/x86-iommu.c
> >+++ b/hw/i386/x86-iommu.c
> >@@ -91,6 +91,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
> >  static Property x86_iommu_properties[] = {
> >      DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
> >      DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
> >+    DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
> 
> Do you know if AMD IOMMU support this?

AMD IOMMU should support this. IIUC it's the first bit of Device Table Entry.

Thanks,

-- 
Peter Xu

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-18  3:50     ` Peter Xu
@ 2017-04-18  4:00       ` Jason Wang
  2017-04-18  4:21         ` Peter Xu
  0 siblings, 1 reply; 7+ messages in thread
From: Jason Wang @ 2017-04-18  4:00 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, David Gibson, Lan Tianyu, Marcel Apfelbaum,
	Michael S.Tsirkin



On 2017年04月18日 11:50, Peter Xu wrote:
> On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
>> On 2017年04月17日 18:58, Peter Xu wrote:
> [...]
>
>>> +static void vtd_switch_address_space(VTDAddressSpace *as)
>>> +{
>>> +    bool use_iommu;
>>> +
>>> +    assert(as);
>>> +
>>> +    use_iommu = as->iommu_state->dmar_enabled;
>>> +    if (use_iommu) {
>>> +        /* Further checks per-device configuration */
>>> +        use_iommu &= !vtd_dev_pt_enabled(as);
>>> +    }
>> Looks like you can use as->iommu_state->dmar_enabled &&
>> !vtd_dev_pt_enabled(as)
> vtd_dev_pt_enalbed() needs to read the guest memory (starting from
> reading root entry), which is slightly slow. I was trying to avoid
> unecessary reads.
>
> [...]

I think compiler won't go to vtd_dev_pt_enabled() if dmar_enabled is false.

>
>>> @@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>>           cc_entry->context_cache_gen = s->context_cache_gen;
>>>       }
>>> +    /*
>>> +     * We don't need to translate for pass-through context entries.
>>> +     * Also, let's ignore IOTLB caching as well for PT devices.
>>> +     */
>>> +    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
>>> +        entry->translated_addr = entry->iova;
>>> +        entry->addr_mask = VTD_PAGE_SIZE - 1;
>>> +        entry->perm = IOMMU_RW;
>>> +        trace_vtd_translate_pt(source_id, entry->iova);
>>> +        return;
>>> +    }
>> Several questions here:
>>
>> 1) Is this just for vhost?
> No. When caching mode is not enabled, all passthroughed devices should
> be using this path.

Ok, then it looks better to switch the address space if we've found it 
was PT?

>
>> 2) Since this is done after IOTLB querying, do we need flush IOTLB during
>> address switching?
> IMHO if guest switches address space for a device, it is required to
> send IOTLB flush as well for that device/domain.
>
> [...]

Ok.

>
>>>   static void vtd_switch_address_space_all(IntelIOMMUState *s)
>>>   {
>>>       GHashTableIter iter;
>>> @@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
>>>           s->ecap |= VTD_ECAP_DT;
>>>       }
>>> +    if (x86_iommu->pt_supported) {
>>> +        s->ecap |= VTD_ECAP_PT;
>>> +    }
>> Since we support migration now, need compat this for pre 2.10.
> Oh yes. If I set pt=off by default, it should be okay then, right?

Right, but I think it's better to keep this on by default for 
performance reason.

>
> Then, at some point, we can switch to on by default, with a touch-up
> in include/hw/compat.h I guess?

Yes.

Thanks
>
>>> +
>>>       if (s->caching_mode) {
>>>           s->cap |= VTD_CAP_CM;
>>>       }
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>> index 29d6707..0e73a65 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -187,6 +187,7 @@
>>>   /* Interrupt Remapping support */
>>>   #define VTD_ECAP_IR                 (1ULL << 3)
>>>   #define VTD_ECAP_EIM                (1ULL << 4)
>>> +#define VTD_ECAP_PT                 (1ULL << 6)
>>>   #define VTD_ECAP_MHMV               (15ULL << 20)
>>>   /* CAP_REG */
>>> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
>>> index 04a6980..867ad0b 100644
>>> --- a/hw/i386/trace-events
>>> +++ b/hw/i386/trace-events
>>> @@ -38,6 +38,7 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P
>>>   vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
>>>   vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
>>>   vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
>>> +vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
>>>   # hw/i386/amd_iommu.c
>>>   amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
>>> diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
>>> index 02b8825..293caf8 100644
>>> --- a/hw/i386/x86-iommu.c
>>> +++ b/hw/i386/x86-iommu.c
>>> @@ -91,6 +91,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
>>>   static Property x86_iommu_properties[] = {
>>>       DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
>>>       DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
>>> +    DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
>> Do you know if AMD IOMMU support this?
> AMD IOMMU should support this. IIUC it's the first bit of Device Table Entry.
>
> Thanks,
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-18  4:00       ` Jason Wang
@ 2017-04-18  4:21         ` Peter Xu
  2017-04-20  5:18           ` Jason Wang
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Xu @ 2017-04-18  4:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: qemu-devel, David Gibson, Lan Tianyu, Marcel Apfelbaum,
	Michael S.Tsirkin

On Tue, Apr 18, 2017 at 12:00:13PM +0800, Jason Wang wrote:
> 
> 
> On 2017年04月18日 11:50, Peter Xu wrote:
> >On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
> >>On 2017年04月17日 18:58, Peter Xu wrote:
> >[...]
> >
> >>>+static void vtd_switch_address_space(VTDAddressSpace *as)
> >>>+{
> >>>+    bool use_iommu;
> >>>+
> >>>+    assert(as);
> >>>+
> >>>+    use_iommu = as->iommu_state->dmar_enabled;
> >>>+    if (use_iommu) {
> >>>+        /* Further checks per-device configuration */
> >>>+        use_iommu &= !vtd_dev_pt_enabled(as);
> >>>+    }
> >>Looks like you can use as->iommu_state->dmar_enabled &&
> >>!vtd_dev_pt_enabled(as)
> >vtd_dev_pt_enalbed() needs to read the guest memory (starting from
> >reading root entry), which is slightly slow. I was trying to avoid
> >unecessary reads.
> >
> >[...]
> 
> I think compiler won't go to vtd_dev_pt_enabled() if dmar_enabled is false.

You are right. I'll switch.

> 
> >
> >>>@@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> >>>          cc_entry->context_cache_gen = s->context_cache_gen;
> >>>      }
> >>>+    /*
> >>>+     * We don't need to translate for pass-through context entries.
> >>>+     * Also, let's ignore IOTLB caching as well for PT devices.
> >>>+     */
> >>>+    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
> >>>+        entry->translated_addr = entry->iova;
> >>>+        entry->addr_mask = VTD_PAGE_SIZE - 1;
> >>>+        entry->perm = IOMMU_RW;
> >>>+        trace_vtd_translate_pt(source_id, entry->iova);
> >>>+        return;
> >>>+    }
> >>Several questions here:
> >>
> >>1) Is this just for vhost?
> >No. When caching mode is not enabled, all passthroughed devices should
> >be using this path.
> 
> Ok, then it looks better to switch the address space if we've found it was
> PT?

Do you mean to switch in that if() above? Then when invalidate context
entry, we switch back if needed?

> 
> >
> >>2) Since this is done after IOTLB querying, do we need flush IOTLB during
> >>address switching?
> >IMHO if guest switches address space for a device, it is required to
> >send IOTLB flush as well for that device/domain.
> >
> >[...]
> 
> Ok.
> 
> >
> >>>  static void vtd_switch_address_space_all(IntelIOMMUState *s)
> >>>  {
> >>>      GHashTableIter iter;
> >>>@@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
> >>>          s->ecap |= VTD_ECAP_DT;
> >>>      }
> >>>+    if (x86_iommu->pt_supported) {
> >>>+        s->ecap |= VTD_ECAP_PT;
> >>>+    }
> >>Since we support migration now, need compat this for pre 2.10.
> >Oh yes. If I set pt=off by default, it should be okay then, right?
> 
> Right, but I think it's better to keep this on by default for performance
> reason.

Okay. Just to confirm, that'll need one entry for HW_COMPAT_2_9,
right? (though it is still not there)

-- 
Peter Xu

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-18  4:21         ` Peter Xu
@ 2017-04-20  5:18           ` Jason Wang
  2017-04-20  5:28             ` Peter Xu
  0 siblings, 1 reply; 7+ messages in thread
From: Jason Wang @ 2017-04-20  5:18 UTC (permalink / raw)
  To: Peter Xu
  Cc: qemu-devel, David Gibson, Lan Tianyu, Marcel Apfelbaum,
	Michael S.Tsirkin



On 2017年04月18日 12:21, Peter Xu wrote:
> On Tue, Apr 18, 2017 at 12:00:13PM +0800, Jason Wang wrote:
>>
>> On 2017年04月18日 11:50, Peter Xu wrote:
>>> On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
>>>> On 2017年04月17日 18:58, Peter Xu wrote:
>>> [...]
>>>
>>>>> +static void vtd_switch_address_space(VTDAddressSpace *as)
>>>>> +{
>>>>> +    bool use_iommu;
>>>>> +
>>>>> +    assert(as);
>>>>> +
>>>>> +    use_iommu = as->iommu_state->dmar_enabled;
>>>>> +    if (use_iommu) {
>>>>> +        /* Further checks per-device configuration */
>>>>> +        use_iommu &= !vtd_dev_pt_enabled(as);
>>>>> +    }
>>>> Looks like you can use as->iommu_state->dmar_enabled &&
>>>> !vtd_dev_pt_enabled(as)
>>> vtd_dev_pt_enalbed() needs to read the guest memory (starting from
>>> reading root entry), which is slightly slow. I was trying to avoid
>>> unecessary reads.
>>>
>>> [...]
>> I think compiler won't go to vtd_dev_pt_enabled() if dmar_enabled is false.
> You are right. I'll switch.
>
>>>>> @@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>>>>           cc_entry->context_cache_gen = s->context_cache_gen;
>>>>>       }
>>>>> +    /*
>>>>> +     * We don't need to translate for pass-through context entries.
>>>>> +     * Also, let's ignore IOTLB caching as well for PT devices.
>>>>> +     */
>>>>> +    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
>>>>> +        entry->translated_addr = entry->iova;
>>>>> +        entry->addr_mask = VTD_PAGE_SIZE - 1;
>>>>> +        entry->perm = IOMMU_RW;
>>>>> +        trace_vtd_translate_pt(source_id, entry->iova);
>>>>> +        return;
>>>>> +    }
>>>> Several questions here:
>>>>
>>>> 1) Is this just for vhost?
>>> No. When caching mode is not enabled, all passthroughed devices should
>>> be using this path.
>> Ok, then it looks better to switch the address space if we've found it was
>> PT?
> Do you mean to switch in that if() above? Then when invalidate context
> entry, we switch back if needed?

Yes.


>
>>>> 2) Since this is done after IOTLB querying, do we need flush IOTLB during
>>>> address switching?
>>> IMHO if guest switches address space for a device, it is required to
>>> send IOTLB flush as well for that device/domain.
>>>
>>> [...]
>> Ok.
>>
>>>>>   static void vtd_switch_address_space_all(IntelIOMMUState *s)
>>>>>   {
>>>>>       GHashTableIter iter;
>>>>> @@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
>>>>>           s->ecap |= VTD_ECAP_DT;
>>>>>       }
>>>>> +    if (x86_iommu->pt_supported) {
>>>>> +        s->ecap |= VTD_ECAP_PT;
>>>>> +    }
>>>> Since we support migration now, need compat this for pre 2.10.
>>> Oh yes. If I set pt=off by default, it should be okay then, right?
>> Right, but I think it's better to keep this on by default for performance
>> reason.
> Okay. Just to confirm, that'll need one entry for HW_COMPAT_2_9,
> right? (though it is still not there)
>

Right.

Thanks

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-20  5:18           ` Jason Wang
@ 2017-04-20  5:28             ` Peter Xu
  2017-04-20  6:05               ` Jason Wang
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Xu @ 2017-04-20  5:28 UTC (permalink / raw)
  To: Jason Wang
  Cc: qemu-devel, David Gibson, Lan Tianyu, Marcel Apfelbaum,
	Michael S.Tsirkin

On Thu, Apr 20, 2017 at 01:18:28PM +0800, Jason Wang wrote:
> 
> 
> On 2017年04月18日 12:21, Peter Xu wrote:
> >On Tue, Apr 18, 2017 at 12:00:13PM +0800, Jason Wang wrote:
> >>
> >>On 2017年04月18日 11:50, Peter Xu wrote:
> >>>On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
> >>>>On 2017年04月17日 18:58, Peter Xu wrote:
> >>>[...]
> >>>
> >>>>>+static void vtd_switch_address_space(VTDAddressSpace *as)
> >>>>>+{
> >>>>>+    bool use_iommu;
> >>>>>+
> >>>>>+    assert(as);
> >>>>>+
> >>>>>+    use_iommu = as->iommu_state->dmar_enabled;
> >>>>>+    if (use_iommu) {
> >>>>>+        /* Further checks per-device configuration */
> >>>>>+        use_iommu &= !vtd_dev_pt_enabled(as);
> >>>>>+    }
> >>>>Looks like you can use as->iommu_state->dmar_enabled &&
> >>>>!vtd_dev_pt_enabled(as)
> >>>vtd_dev_pt_enalbed() needs to read the guest memory (starting from
> >>>reading root entry), which is slightly slow. I was trying to avoid
> >>>unecessary reads.
> >>>
> >>>[...]
> >>I think compiler won't go to vtd_dev_pt_enabled() if dmar_enabled is false.
> >You are right. I'll switch.
> >
> >>>>>@@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> >>>>>          cc_entry->context_cache_gen = s->context_cache_gen;
> >>>>>      }
> >>>>>+    /*
> >>>>>+     * We don't need to translate for pass-through context entries.
> >>>>>+     * Also, let's ignore IOTLB caching as well for PT devices.
> >>>>>+     */
> >>>>>+    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
> >>>>>+        entry->translated_addr = entry->iova;
> >>>>>+        entry->addr_mask = VTD_PAGE_SIZE - 1;
> >>>>>+        entry->perm = IOMMU_RW;
> >>>>>+        trace_vtd_translate_pt(source_id, entry->iova);
> >>>>>+        return;
> >>>>>+    }
> >>>>Several questions here:
> >>>>
> >>>>1) Is this just for vhost?
> >>>No. When caching mode is not enabled, all passthroughed devices should
> >>>be using this path.
> >>Ok, then it looks better to switch the address space if we've found it was
> >>PT?
> >Do you mean to switch in that if() above? Then when invalidate context
> >entry, we switch back if needed?
> 
> Yes.

Sure. Do you mind if I put this into another standalone patch? I see
it an enhancement that can be separated from current one. Thanks,

-- 
Peter Xu

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT)
  2017-04-20  5:28             ` Peter Xu
@ 2017-04-20  6:05               ` Jason Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Jason Wang @ 2017-04-20  6:05 UTC (permalink / raw)
  To: Peter Xu
  Cc: Lan Tianyu, Marcel Apfelbaum, Michael S.Tsirkin, qemu-devel,
	David Gibson



On 2017年04月20日 13:28, Peter Xu wrote:
> On Thu, Apr 20, 2017 at 01:18:28PM +0800, Jason Wang wrote:
>>
>> On 2017年04月18日 12:21, Peter Xu wrote:
>>> On Tue, Apr 18, 2017 at 12:00:13PM +0800, Jason Wang wrote:
>>>> On 2017年04月18日 11:50, Peter Xu wrote:
>>>>> On Tue, Apr 18, 2017 at 11:23:35AM +0800, Jason Wang wrote:
>>>>>> On 2017年04月17日 18:58, Peter Xu wrote:
>>>>> [...]
>>>>>
>>>>>>> +static void vtd_switch_address_space(VTDAddressSpace *as)
>>>>>>> +{
>>>>>>> +    bool use_iommu;
>>>>>>> +
>>>>>>> +    assert(as);
>>>>>>> +
>>>>>>> +    use_iommu = as->iommu_state->dmar_enabled;
>>>>>>> +    if (use_iommu) {
>>>>>>> +        /* Further checks per-device configuration */
>>>>>>> +        use_iommu &= !vtd_dev_pt_enabled(as);
>>>>>>> +    }
>>>>>> Looks like you can use as->iommu_state->dmar_enabled &&
>>>>>> !vtd_dev_pt_enabled(as)
>>>>> vtd_dev_pt_enalbed() needs to read the guest memory (starting from
>>>>> reading root entry), which is slightly slow. I was trying to avoid
>>>>> unecessary reads.
>>>>>
>>>>> [...]
>>>> I think compiler won't go to vtd_dev_pt_enabled() if dmar_enabled is false.
>>> You are right. I'll switch.
>>>
>>>>>>> @@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>>>>>>           cc_entry->context_cache_gen = s->context_cache_gen;
>>>>>>>       }
>>>>>>> +    /*
>>>>>>> +     * We don't need to translate for pass-through context entries.
>>>>>>> +     * Also, let's ignore IOTLB caching as well for PT devices.
>>>>>>> +     */
>>>>>>> +    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
>>>>>>> +        entry->translated_addr = entry->iova;
>>>>>>> +        entry->addr_mask = VTD_PAGE_SIZE - 1;
>>>>>>> +        entry->perm = IOMMU_RW;
>>>>>>> +        trace_vtd_translate_pt(source_id, entry->iova);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>> Several questions here:
>>>>>>
>>>>>> 1) Is this just for vhost?
>>>>> No. When caching mode is not enabled, all passthroughed devices should
>>>>> be using this path.
>>>> Ok, then it looks better to switch the address space if we've found it was
>>>> PT?
>>> Do you mean to switch in that if() above? Then when invalidate context
>>> entry, we switch back if needed?
>> Yes.
> Sure. Do you mind if I put this into another standalone patch? I see
> it an enhancement that can be separated from current one. Thanks,
>

I don't mind. It can be done on top.

Thanks

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-04-20  6:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1492426712-12230-1-git-send-email-peterx@redhat.com>
     [not found] ` <1492426712-12230-8-git-send-email-peterx@redhat.com>
2017-04-18  3:23   ` [Qemu-devel] [PATCH 7/7] intel_iommu: support passthrough (PT) Jason Wang
2017-04-18  3:50     ` Peter Xu
2017-04-18  4:00       ` Jason Wang
2017-04-18  4:21         ` Peter Xu
2017-04-20  5:18           ` Jason Wang
2017-04-20  5:28             ` Peter Xu
2017-04-20  6:05               ` Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).