Re: [Qemu-devel] [RFC PATCH qemu v3 2/4] vfio: Use different page size for different IOMMU types

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>,
	Michael Roth <mdroth@linux.vnet.ibm.com>,
	qemu-devel@nongnu.org, qemu-ppc@nongnu.org,
	Paolo Bonzini <pbonzini@redhat.com>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [Qemu-devel] [RFC PATCH qemu v3 2/4] vfio: Use different page size for different IOMMU types
Date: Thu, 16 Jul 2015 13:31:36 +1000	[thread overview]
Message-ID: <55A72598.7010408@ozlabs.ru> (raw)
In-Reply-To: <1437015109.1391.540.camel@redhat.com>

On 07/16/2015 12:51 PM, Alex Williamson wrote:
> On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote:
>> On 07/16/2015 04:26 AM, Alex Williamson wrote:
>>> On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
>>>> The existing memory listener is called on RAM or PCI address space
>>>> which implies potentially different page size. Instead of guessing
>>>> what page size should be used, this replaces a single IOMMU memory
>>>> listener by two, one per supported IOMMU type; listener callbacks
>>>> call the existing helpers with a known page size.
>>>>
>>>> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
>>>> from IOMMU.
>>>>
>>>> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
>>>> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
>>>> non IOMMU regions (which is an MSIX window) which duplicates
>>>> vfio_listener_skipped_section() a bit.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>>    hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
>>>>    1 file changed, 76 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 85ee9b0..aad41e1 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -312,11 +312,11 @@ out:
>>>>        rcu_read_unlock();
>>>>    }
>>>>
>>>> -static void vfio_listener_region_add(MemoryListener *listener,
>>>> +static void vfio_listener_region_add(VFIOContainer *container,
>>>> +                                     hwaddr page_mask,
>>>
>>> Should memory_region_iommu_get_page_sizes() return a hwaddr?
>>
>>
>> I do not think so, memory.c uses uint64_t for masks.
>>
>>
>>
>>>> +                                     MemoryListener *listener,
>>>>                                         MemoryRegionSection *section)
>>>>    {
>>>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> -                                            iommu_data.type1.listener);
>>>>        hwaddr iova, end;
>>>>        Int128 llend;
>>>>        void *vaddr;
>>>> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>            return;
>>>>        }
>>>>
>>>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> +                 (section->offset_within_region & ~page_mask))) {
>>>>            error_report("%s received unaligned region", __func__);
>>>>            return;
>>>>        }
>>>>
>>>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>>        llend = int128_make64(section->offset_within_address_space);
>>>>        llend = int128_add(llend, section->size);
>>>> -    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>>>> +    llend = int128_and(llend, int128_exts64(page_mask));
>>>>
>>>>        if (int128_ge(int128_make64(iova), llend)) {
>>>>            return;
>>>> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>        }
>>>>    }
>>>>
>>>> -static void vfio_listener_region_del(MemoryListener *listener,
>>>> +static void vfio_listener_region_del(VFIOContainer *container,
>>>> +                                     hwaddr page_mask,
>>>> +                                     MemoryListener *listener,
>>>>                                         MemoryRegionSection *section)
>>>>    {
>>>> -    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> -                                            iommu_data.type1.listener);
>>>>        hwaddr iova, end;
>>>>        int ret;
>>>>
>>>> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>            return;
>>>>        }
>>>>
>>>> -    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> -                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> +    if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> +                 (section->offset_within_region & ~page_mask))) {
>>>>            error_report("%s received unaligned region", __func__);
>>>>            return;
>>>>        }
>>>> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>             */
>>>>        }
>>>>
>>>> -    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> +    iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>>        end = (section->offset_within_address_space + int128_get64(section->size)) &
>>>> -          TARGET_PAGE_MASK;
>>>> +          page_mask;
>>>>
>>>>        if (iova >= end) {
>>>>            return;
>>>> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>>        }
>>>>    }
>>>>
>>>> -static const MemoryListener vfio_memory_listener = {
>>>> -    .region_add = vfio_listener_region_add,
>>>> -    .region_del = vfio_listener_region_del,
>>>> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
>>>> +                                                 MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> +                                            iommu_data.type1.listener);
>>>> +
>>>> +    vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
>>>> +                             section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
>>>> +                                                 MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> +                                            iommu_data.type1.listener);
>>>> +
>>>> +    vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
>>>> +                             section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_type1_iommu_listener = {
>>>> +    .region_add = vfio_type1_iommu_listener_region_add,
>>>> +    .region_del = vfio_type1_iommu_listener_region_del,
>>>> +};
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
>>>> +                                     MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container;
>>>> +    hwaddr page_mask;
>>>> +
>>>> +    if (!memory_region_is_iommu(section->mr)) {
>>>> +        return;
>>>> +    }
>>>> +    container = container_of(listener, VFIOContainer,
>>>> +                             iommu_data.type1.listener);
>>>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> +    vfio_listener_region_add(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
>>>> +                                     MemoryRegionSection *section)
>>>> +{
>>>> +    VFIOContainer *container;
>>>> +    hwaddr page_mask;
>>>> +
>>>> +    if (!memory_region_is_iommu(section->mr)) {
>>>> +        return;
>>>> +    }
>>>> +    container = container_of(listener, VFIOContainer,
>>>> +                             iommu_data.type1.listener);
>>>> +    page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> +    vfio_listener_region_del(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_spapr_iommu_listener = {
>>>> +    .region_add = vfio_spapr_iommu_listener_region_add,
>>>> +    .region_del = vfio_spapr_iommu_listener_region_del,
>>>>    };
>>>
>>>
>>> Rather than creating all these wrappers, why don't we create a structure
>>> that gives us callbacks we can use for a shared function?  If we had:
>>>
>>> struct VFIOMemoryListener {
>>> 	struct MemoryListener listener;
>>> 	bool (*filter)(MemoryRegionSection *section);
>>> 	hwaddr (page_size)(MemoryRegionSection *section);
>>> 	VFIOContainer *container;
>>> }
>>>
>>> Then there would be no reason for you to have separate wrappers for
>>> spapr.  VFIOType1 would have a single VFIOMemoryListener, you could have
>>> an array of two.
>>
>> Sorry, I am missing the point here...
>>
>> I cannot just have an array of these - I will have to store an address
>> spaces per a listener in a container to implement filter() so I'd rather
>> store just an address space and not add filter() at all. And I will still
>
> You're not storing an address space now.


I am, indirectly, when register the listener with a filter which is never 
NULL and which is passed to the listener. If not that, the whole thing 
would be simpler. The rest is quite clear now, you are right.



>> need "memory: Add reporting of supported page sizes" - or there is no way
>
> Just like you do now.
>
>> to implement it all. So I still end up having 6 callbacks in
>> hw/vfio/common.c. What does this win for us?
>
> Nothing you're suggesting adds any more callbacks.
>
> The current filter is vfio_listener_skipped_section().  The iommu filter
> is simply:
>
> bool vfio_iommu_listener_skipped_section(mr)
> {
>    return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr);
> }
>
> Notice how I didn't have to call it spapr because it's probably what any
> guest iommu is going to do...
>
> The v2 spapr filter is:
>
> bool vfio_nodump_listener_skipped_section(mr)
> {
>    return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr);
> }
>
> Notice again that we can use generic functions and not make everything
> spapr specific.


Ok, we add 2 (not 3) as vfio_listener_skipped_section() is still there.
You got +1.


> The page_size callbacks are similarly trivial, the iommu one obviously
> calls memory_region_iommu_get_page_sizes, the other one uses
> ~qemu_real_host_page_mask.  Again, nothing spapr specific.


These are trivial but I need all 3 - for type1 iommu (real page size), 
spapr iommu (TCE page size) and spapr memoryprereg (real page size).
Ok, I can reuse 1st for 3rd and get one less. +2.


> The container of course points back to the container and now
> vfio_listener_region_add() simply does:
>
> VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener);
> VFIOContainer = vlistener->container;
> hwaddr page_mask = vlistener->page_size(mr);
>
> if (vlistener->filter && vlistener->filter(mr)) {
>      return
> }
> ...
>
> And suddenly we get to piece together a handful of _generic_ helper
> functions into a VFIOMemoryListener and we're not adding a new pair of
> wrappers for everything and duplicating trivial setup.  Thanks,

Ok, we are saving 2 callbacks and make others smaller. Makes sense. Thanks 
for chewing (right word here?) this stuff for me :)


>
> Alex
>
>>>>
>>>>    static void vfio_listener_release(VFIOContainer *container)
>>>> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>>                goto free_container_exit;
>>>>            }
>>>>
>>>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>>>> +        container->iommu_data.type1.listener = vfio_type1_iommu_listener;
>>>>            container->iommu_data.release = vfio_listener_release;
>>>>
>>>>            memory_listener_register(&container->iommu_data.type1.listener,
>>>> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>>                goto free_container_exit;
>>>>            }
>>>>
>>>> -        container->iommu_data.type1.listener = vfio_memory_listener;
>>>> +        container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
>>>>            container->iommu_data.release = vfio_listener_release;
>>>>
>>>>            memory_listener_register(&container->iommu_data.type1.listener,
>>>
>>>
>>>
>>
>>
>
>
>


-- 
Alexey

next prev parent reply	other threads:[~2015-07-16  3:31 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-14 12:21 [Qemu-devel] [RFC PATCH qemu v3 0/4] vfio: SPAPR IOMMU v2 (memory preregistration support) Alexey Kardashevskiy
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 1/4] memory: Add reporting of supported page sizes Alexey Kardashevskiy
2015-07-15 18:26   ` Alex Williamson
2015-07-16  1:12     ` Alexey Kardashevskiy
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 2/4] vfio: Use different page size for different IOMMU types Alexey Kardashevskiy
2015-07-15 18:26   ` Alex Williamson
2015-07-16  1:26     ` Alexey Kardashevskiy
2015-07-16  2:51       ` Alex Williamson
2015-07-16  3:31         ` Alexey Kardashevskiy [this message]
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 3/4] vfio: Store IOMMU type in container Alexey Kardashevskiy
2015-07-15 18:26   ` Alex Williamson
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 4/4] vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering) Alexey Kardashevskiy
2015-07-16  5:11   ` David Gibson
2015-07-16 14:44     ` Alex Williamson
2015-07-17  5:20       ` David Gibson
2015-07-17 18:25         ` Alex Williamson
2015-07-18 15:05           ` David Gibson
2015-07-19 15:04             ` Alex Williamson
2015-07-17  7:13     ` Alexey Kardashevskiy
2015-07-17 13:39       ` David Gibson
2015-07-17 15:47         ` Alexey Kardashevskiy
2015-07-18 15:17           ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55A72598.7010408@ozlabs.ru \
    --to=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=mdroth@linux.vnet.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.crosthwaite@xilinx.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).