From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>,
Michael Roth <mdroth@linux.vnet.ibm.com>,
qemu-devel@nongnu.org, qemu-ppc@nongnu.org,
Paolo Bonzini <pbonzini@redhat.com>,
David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [Qemu-devel] [RFC PATCH qemu v3 2/4] vfio: Use different page size for different IOMMU types
Date: Thu, 16 Jul 2015 13:31:36 +1000 [thread overview]
Message-ID: <55A72598.7010408@ozlabs.ru> (raw)
In-Reply-To: <1437015109.1391.540.camel@redhat.com>
On 07/16/2015 12:51 PM, Alex Williamson wrote:
> On Thu, 2015-07-16 at 11:26 +1000, Alexey Kardashevskiy wrote:
>> On 07/16/2015 04:26 AM, Alex Williamson wrote:
>>> On Tue, 2015-07-14 at 22:21 +1000, Alexey Kardashevskiy wrote:
>>>> The existing memory listener is called on RAM or PCI address space
>>>> which implies potentially different page size. Instead of guessing
>>>> what page size should be used, this replaces a single IOMMU memory
>>>> listener by two, one per supported IOMMU type; listener callbacks
>>>> call the existing helpers with a known page size.
>>>>
>>>> For now, Type1 uses qemu_real_host_page_mask, sPAPR uses the page size
>>>> from IOMMU.
>>>>
>>>> As memory_region_iommu_get_page_sizes() asserts on non-IOMMU regions,
>>>> this adds memory_region_is_iommu() to the SPAPR IOMMU listener to skip
>>>> non IOMMU regions (which is an MSIX window) which duplicates
>>>> vfio_listener_skipped_section() a bit.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>> hw/vfio/common.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------------
>>>> 1 file changed, 76 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 85ee9b0..aad41e1 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -312,11 +312,11 @@ out:
>>>> rcu_read_unlock();
>>>> }
>>>>
>>>> -static void vfio_listener_region_add(MemoryListener *listener,
>>>> +static void vfio_listener_region_add(VFIOContainer *container,
>>>> + hwaddr page_mask,
>>>
>>> Should memory_region_iommu_get_page_sizes() return a hwaddr?
>>
>>
>> I do not think so, memory.c uses uint64_t for masks.
>>
>>
>>
>>>> + MemoryListener *listener,
>>>> MemoryRegionSection *section)
>>>> {
>>>> - VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> - iommu_data.type1.listener);
>>>> hwaddr iova, end;
>>>> Int128 llend;
>>>> void *vaddr;
>>>> @@ -330,16 +330,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>> return;
>>>> }
>>>>
>>>> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> - (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> + if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> + (section->offset_within_region & ~page_mask))) {
>>>> error_report("%s received unaligned region", __func__);
>>>> return;
>>>> }
>>>>
>>>> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>> llend = int128_make64(section->offset_within_address_space);
>>>> llend = int128_add(llend, section->size);
>>>> - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>>>> + llend = int128_and(llend, int128_exts64(page_mask));
>>>>
>>>> if (int128_ge(int128_make64(iova), llend)) {
>>>> return;
>>>> @@ -416,11 +416,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>> }
>>>> }
>>>>
>>>> -static void vfio_listener_region_del(MemoryListener *listener,
>>>> +static void vfio_listener_region_del(VFIOContainer *container,
>>>> + hwaddr page_mask,
>>>> + MemoryListener *listener,
>>>> MemoryRegionSection *section)
>>>> {
>>>> - VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> - iommu_data.type1.listener);
>>>> hwaddr iova, end;
>>>> int ret;
>>>>
>>>> @@ -432,8 +432,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>> return;
>>>> }
>>>>
>>>> - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>>>> - (section->offset_within_region & ~TARGET_PAGE_MASK))) {
>>>> + if (unlikely((section->offset_within_address_space & ~page_mask) !=
>>>> + (section->offset_within_region & ~page_mask))) {
>>>> error_report("%s received unaligned region", __func__);
>>>> return;
>>>> }
>>>> @@ -459,9 +459,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>> */
>>>> }
>>>>
>>>> - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>>>> + iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
>>>> end = (section->offset_within_address_space + int128_get64(section->size)) &
>>>> - TARGET_PAGE_MASK;
>>>> + page_mask;
>>>>
>>>> if (iova >= end) {
>>>> return;
>>>> @@ -478,9 +478,66 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>>> }
>>>> }
>>>>
>>>> -static const MemoryListener vfio_memory_listener = {
>>>> - .region_add = vfio_listener_region_add,
>>>> - .region_del = vfio_listener_region_del,
>>>> +static void vfio_type1_iommu_listener_region_add(MemoryListener *listener,
>>>> + MemoryRegionSection *section)
>>>> +{
>>>> + VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> + iommu_data.type1.listener);
>>>> +
>>>> + vfio_listener_region_add(container, qemu_real_host_page_mask, listener,
>>>> + section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_type1_iommu_listener_region_del(MemoryListener *listener,
>>>> + MemoryRegionSection *section)
>>>> +{
>>>> + VFIOContainer *container = container_of(listener, VFIOContainer,
>>>> + iommu_data.type1.listener);
>>>> +
>>>> + vfio_listener_region_del(container, qemu_real_host_page_mask, listener,
>>>> + section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_type1_iommu_listener = {
>>>> + .region_add = vfio_type1_iommu_listener_region_add,
>>>> + .region_del = vfio_type1_iommu_listener_region_del,
>>>> +};
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_add(MemoryListener *listener,
>>>> + MemoryRegionSection *section)
>>>> +{
>>>> + VFIOContainer *container;
>>>> + hwaddr page_mask;
>>>> +
>>>> + if (!memory_region_is_iommu(section->mr)) {
>>>> + return;
>>>> + }
>>>> + container = container_of(listener, VFIOContainer,
>>>> + iommu_data.type1.listener);
>>>> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> + vfio_listener_region_add(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +
>>>> +static void vfio_spapr_iommu_listener_region_del(MemoryListener *listener,
>>>> + MemoryRegionSection *section)
>>>> +{
>>>> + VFIOContainer *container;
>>>> + hwaddr page_mask;
>>>> +
>>>> + if (!memory_region_is_iommu(section->mr)) {
>>>> + return;
>>>> + }
>>>> + container = container_of(listener, VFIOContainer,
>>>> + iommu_data.type1.listener);
>>>> + page_mask = ~(memory_region_iommu_get_page_sizes(section->mr) - 1);
>>>> + vfio_listener_region_del(container, page_mask, listener, section);
>>>> +}
>>>> +
>>>> +static const MemoryListener vfio_spapr_iommu_listener = {
>>>> + .region_add = vfio_spapr_iommu_listener_region_add,
>>>> + .region_del = vfio_spapr_iommu_listener_region_del,
>>>> };
>>>
>>>
>>> Rather than creating all these wrappers, why don't we create a structure
>>> that gives us callbacks we can use for a shared function? If we had:
>>>
>>> struct VFIOMemoryListener {
>>> struct MemoryListener listener;
>>> bool (*filter)(MemoryRegionSection *section);
>>> hwaddr (page_size)(MemoryRegionSection *section);
>>> VFIOContainer *container;
>>> }
>>>
>>> Then there would be no reason for you to have separate wrappers for
>>> spapr. VFIOType1 would have a single VFIOMemoryListener, you could have
>>> an array of two.
>>
>> Sorry, I am missing the point here...
>>
>> I cannot just have an array of these - I will have to store an address
>> spaces per a listener in a container to implement filter() so I'd rather
>> store just an address space and not add filter() at all. And I will still
>
> You're not storing an address space now.
I am, indirectly, when register the listener with a filter which is never
NULL and which is passed to the listener. If not that, the whole thing
would be simpler. The rest is quite clear now, you are right.
>> need "memory: Add reporting of supported page sizes" - or there is no way
>
> Just like you do now.
>
>> to implement it all. So I still end up having 6 callbacks in
>> hw/vfio/common.c. What does this win for us?
>
> Nothing you're suggesting adds any more callbacks.
>
> The current filter is vfio_listener_skipped_section(). The iommu filter
> is simply:
>
> bool vfio_iommu_listener_skipped_section(mr)
> {
> return vfio_listener_skipped_section(mr) || !memory_region_is_iommu(mr);
> }
>
> Notice how I didn't have to call it spapr because it's probably what any
> guest iommu is going to do...
>
> The v2 spapr filter is:
>
> bool vfio_nodump_listener_skipped_section(mr)
> {
> return vfio_listener_skipped_section(mr) || memory_region_is_skip_dump(mr);
> }
>
> Notice again that we can use generic functions and not make everything
> spapr specific.
Ok, we add 2 (not 3) as vfio_listener_skipped_section() is still there.
You got +1.
> The page_size callbacks are similarly trivial, the iommu one obviously
> calls memory_region_iommu_get_page_sizes, the other one uses
> ~qemu_real_host_page_mask. Again, nothing spapr specific.
These are trivial but I need all 3 - for type1 iommu (real page size),
spapr iommu (TCE page size) and spapr memoryprereg (real page size).
Ok, I can reuse 1st for 3rd and get one less. +2.
> The container of course points back to the container and now
> vfio_listener_region_add() simply does:
>
> VFIOMemoryListener vlistener = container_of(listener, VFIOMemoryListener, listener);
> VFIOContainer = vlistener->container;
> hwaddr page_mask = vlistener->page_size(mr);
>
> if (vlistener->filter && vlistener->filter(mr)) {
> return
> }
> ...
>
> And suddenly we get to piece together a handful of _generic_ helper
> functions into a VFIOMemoryListener and we're not adding a new pair of
> wrappers for everything and duplicating trivial setup. Thanks,
Ok, we are saving 2 callbacks and make others smaller. Makes sense. Thanks
for chewing (right word here?) this stuff for me :)
>
> Alex
>
>>>>
>>>> static void vfio_listener_release(VFIOContainer *container)
>>>> @@ -684,7 +741,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>> goto free_container_exit;
>>>> }
>>>>
>>>> - container->iommu_data.type1.listener = vfio_memory_listener;
>>>> + container->iommu_data.type1.listener = vfio_type1_iommu_listener;
>>>> container->iommu_data.release = vfio_listener_release;
>>>>
>>>> memory_listener_register(&container->iommu_data.type1.listener,
>>>> @@ -724,7 +781,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>>>> goto free_container_exit;
>>>> }
>>>>
>>>> - container->iommu_data.type1.listener = vfio_memory_listener;
>>>> + container->iommu_data.type1.listener = vfio_spapr_iommu_listener;
>>>> container->iommu_data.release = vfio_listener_release;
>>>>
>>>> memory_listener_register(&container->iommu_data.type1.listener,
>>>
>>>
>>>
>>
>>
>
>
>
--
Alexey
next prev parent reply other threads:[~2015-07-16 3:31 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-14 12:21 [Qemu-devel] [RFC PATCH qemu v3 0/4] vfio: SPAPR IOMMU v2 (memory preregistration support) Alexey Kardashevskiy
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 1/4] memory: Add reporting of supported page sizes Alexey Kardashevskiy
2015-07-15 18:26 ` Alex Williamson
2015-07-16 1:12 ` Alexey Kardashevskiy
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 2/4] vfio: Use different page size for different IOMMU types Alexey Kardashevskiy
2015-07-15 18:26 ` Alex Williamson
2015-07-16 1:26 ` Alexey Kardashevskiy
2015-07-16 2:51 ` Alex Williamson
2015-07-16 3:31 ` Alexey Kardashevskiy [this message]
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 3/4] vfio: Store IOMMU type in container Alexey Kardashevskiy
2015-07-15 18:26 ` Alex Williamson
2015-07-14 12:21 ` [Qemu-devel] [RFC PATCH qemu v3 4/4] vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering) Alexey Kardashevskiy
2015-07-16 5:11 ` David Gibson
2015-07-16 14:44 ` Alex Williamson
2015-07-17 5:20 ` David Gibson
2015-07-17 18:25 ` Alex Williamson
2015-07-18 15:05 ` David Gibson
2015-07-19 15:04 ` Alex Williamson
2015-07-17 7:13 ` Alexey Kardashevskiy
2015-07-17 13:39 ` David Gibson
2015-07-17 15:47 ` Alexey Kardashevskiy
2015-07-18 15:17 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55A72598.7010408@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=alex.williamson@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=mdroth@linux.vnet.ibm.com \
--cc=pbonzini@redhat.com \
--cc=peter.crosthwaite@xilinx.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.