qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: Alexey Kardashevskiy <aik@ozlabs.ru>, qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable
Date: Thu, 22 May 2014 12:09:58 +0200	[thread overview]
Message-ID: <537DCCF6.1090108@suse.de> (raw)
In-Reply-To: <537D3A7F.3020604@ozlabs.ru>


On 22.05.14 01:45, Alexey Kardashevskiy wrote:
> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
>>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
>>> spec allows other page sizes and we are going to implement them, we need
>>> page size to be configrable.
>>>
>>> This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
>>> with it whereever it is possible.
>>>
>>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>    hw/ppc/spapr_iommu.c   | 54
>>> +++++++++++++++++++++++++++++---------------------
>>>    hw/ppc/spapr_pci.c     |  1 +
>>>    hw/ppc/spapr_vio.c     |  1 +
>>>    include/hw/ppc/spapr.h |  3 ++-
>>>    4 files changed, 35 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>> index 90de3e3..e765a6d 100644
>>> --- a/hw/ppc/spapr_iommu.c
>>> +++ b/hw/ppc/spapr_iommu.c
>>> @@ -70,12 +70,13 @@ static IOMMUTLBEntry
>>> spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>>          if (tcet->bypass) {
>>>            ret.perm = IOMMU_RW;
>>> -    } else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
>>> +    } else if ((addr >> tcet->page_shift) < tcet->nb_table) {
>>>            /* Check if we are in bound */
>>> -        tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
>>> -        ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
>>> -        ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>> -        ret.addr_mask = SPAPR_TCE_PAGE_MASK;
>>> +        target_ulong mask = ~((1 << tcet->page_shift) - 1);
>> Why target_ulong? This should be u64 or hwaddr or something along those
>> lines, no? Also, can the mask grow bigger than 31bits? If so you probably
>> want 1ULL (below as well).
>>
>> In fact, we might be better off with a few more fields to tcet. Just add
>> page_mask and page_size in addition to the page_shift one and use them
>> instead of calculating them over and over again.
>>
>>> +        tce = tcet->table[addr >> tcet->page_shift];
>>> +        ret.iova = addr & mask;
>>> +        ret.translated_addr = tce & mask;
>>> +        ret.addr_mask = ~mask;
>>>            ret.perm = tce;
>>>        }
>>>        trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
>>> @@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>>        if (kvm_enabled()) {
>>>            tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
>>>                                                  tcet->nb_table <<
>>> -                                              SPAPR_TCE_PAGE_SHIFT,
>>> +                                              tcet->page_shift,
>>>                                                  &tcet->fd);
>>>        }
>>>    @@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>>    }
>>>      sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
>>> +                                   uint32_t page_shift,
>>>                                       uint32_t nb_table)
>>>    {
>>>        sPAPRTCETable *tcet;
>>> @@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState
>>> *owner, uint32_t liobn,
>>>          tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
>>>        tcet->liobn = liobn;
>>> +    tcet->page_shift = page_shift;
>>>        tcet->nb_table = nb_table;
>>>          object_property_add_child(OBJECT(owner), "tce-table",
>>> OBJECT(tcet), NULL);
>>> @@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable
>>> *tcet, target_ulong ioba,
>>>                                    target_ulong tce)
>>>    {
>>>        IOMMUTLBEntry entry;
>>> +    target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>>    -    if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>> +    if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>>            hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
>>>                          TARGET_FMT_lx "\n", ioba);
>>>            return H_PARAMETER;
>>>        }
>>>    -    tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
>>> +    tcet->table[ioba >> tcet->page_shift] = tce;
>>>          entry.target_as = &address_space_memory,
>>> -    entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
>>> -    entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>> -    entry.addr_mask = SPAPR_TCE_PAGE_MASK;
>>> +    entry.iova = ioba & mask;
>>> +    entry.translated_addr = tce & mask;
>>> +    entry.addr_mask = ~mask;
>>>        entry.perm = tce;
>>>        memory_region_notify_iommu(&tcet->iommu, entry);
>>>    @@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>> *cpu,
>>>        target_ulong ret = H_PARAMETER;
>>>        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>>        CPUState *cs = CPU(cpu);
>>> +    target_ulong mask;
>>>          if (!tcet) {
>>>            return H_PARAMETER;
>>> @@ -235,12 +240,14 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>> *cpu,
>>>            return H_PARAMETER;
>>>        }
>>>    -    ioba &= ~SPAPR_TCE_PAGE_MASK;
>>> -    tce_list &= ~SPAPR_TCE_PAGE_MASK;
>>> +    mask = ~((1 << tcet->page_shift) - 1);
>>> +    ioba &= mask;
>>> +
>>> +    for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>> +        target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
>>> +                                i * sizeof(target_ulong);
>>> +        target_ulong tce = ldq_phys(cs->as, off);
>>>    -    for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>> -        target_ulong tce = ldq_phys(cs->as, tce_list +
>>> -                                    i * sizeof(target_ulong));
>>>            ret = put_tce_emu(tcet, ioba, tce);
>>>            if (ret) {
>>>                break;
>>> @@ -276,9 +283,9 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>>            return H_PARAMETER;
>>>        }
>>>    -    ioba &= ~SPAPR_TCE_PAGE_MASK;
>>> +    ioba &= ~((1 << tcet->page_shift) - 1);
>>>    -    for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>> +    for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>>            ret = put_tce_emu(tcet, ioba, tce_value);
>>>            if (ret) {
>>>                break;
>>> @@ -298,7 +305,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>>        target_ulong ret = H_PARAMETER;
>>>        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>>    -    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>> +    ioba &= ~((1 << tcet->page_shift) - 1);
>>>          if (tcet) {
>>>            ret = put_tce_emu(tcet, ioba, tce);
>>> @@ -311,13 +318,13 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>>    static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>>                                    target_ulong *tce)
>>>    {
>>> -    if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>> +    if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>>            hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
>>>                          TARGET_FMT_lx "\n", ioba);
>>>            return H_PARAMETER;
>>>        }
>>>    -    *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT];
>>> +    *tce = tcet->table[ioba >> tcet->page_shift];
>>>          return H_SUCCESS;
>>>    }
>>> @@ -330,8 +337,9 @@ static target_ulong h_get_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>>        target_ulong tce = 0;
>>>        target_ulong ret = H_PARAMETER;
>>>        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>> +    const target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>>    -    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>> +    ioba &= mask;
>>>          if (tcet) {
>>>            ret = get_tce_emu(tcet, ioba, &tce);
>>> @@ -382,7 +390,7 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const
>>> char *propname,
>>>        }
>>>          return spapr_dma_dt(fdt, node_off, propname,
>>> -                        tcet->liobn, 0, tcet->nb_table <<
>>> SPAPR_TCE_PAGE_SHIFT);
>>> +                        tcet->liobn, 0, tcet->nb_table <<
>>> tcet->page_shift);
>>>    }
>>>      static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index fdd4c07..c9850d4 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
>>> *sphb, Error **errp)
>>>        sPAPRTCETable *tcet;
>>>          tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
>>> +                               SPAPR_TCE_PAGE_SHIFT,
>>>                                   0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
>>>        if (!tcet) {
>>>            error_setg(errp, "Unable to create TCE table for %s",
>>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>>> index b84e481..d7e9e6a 100644
>>> --- a/hw/ppc/spapr_vio.c
>>> +++ b/hw/ppc/spapr_vio.c
>>> @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
>>>        if (pc->rtce_window_size) {
>>>            uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
>>>            dev->tcet = spapr_tce_new_table(qdev, liobn,
>>> +                                        SPAPR_TCE_PAGE_SHIFT,
>> I don't quite understand who defines what the TCE page size is for a given
>> device. Can you try to explain this to me?
> If it is default window (for PCI) or window for VIO - it is 4K. If it is a
> dynamic DMA window - page size is a parameter of RTAS call which creates
> the window.

Could we change that default size for non-dynamic windows somehow? 4k is 
really fine grained.

Since the KVM in-kernel TCE code really is just a dumb memory poker 
without checks, I guess we're fine on that side with dynamic page sizes.


Alex

  reply	other threads:[~2014-05-22 10:10 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-21 14:21 [Qemu-devel] [PATCH 0/9] spapr_pci: Prepare for VFIO Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 1/9] spapr: Enable dynamic change of the supported hypercalls list Alexey Kardashevskiy
2014-05-21 14:26   ` Alexander Graf
2014-05-21 15:21     ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-22 10:47       ` Alexander Graf
2014-05-22 11:01         ` Alexey Kardashevskiy
2014-05-22 11:02           ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 2/9] spapr_iommu: Enable multiple TCE requests Alexey Kardashevskiy
2014-05-21 14:37   ` Alexander Graf
2014-05-21 15:23     ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-21 16:03       ` Alexey Kardashevskiy
2014-05-21 21:54         ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 3/9] spapr_pci: Introduce a finish_realize() callback Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 4/9] spapr_pci: spapr_iommu: Make DMA window a subregion Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 5/9] spapr_pci: Allow multiple TCE tables per PHB Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 6/9] spapr_iommu: Convert old qdev_init_nofail() to object_property_set_bool Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 7/9] spapr_iommu: Get rid of window_size in sPAPRTCETable Alexey Kardashevskiy
2014-05-21 22:05   ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift " Alexey Kardashevskiy
2014-05-21 22:11   ` Alexander Graf
2014-05-21 23:45     ` Alexey Kardashevskiy
2014-05-22 10:09       ` Alexander Graf [this message]
2014-05-22 10:24         ` Alexey Kardashevskiy
2014-05-22 10:45           ` Alexander Graf
2014-05-22 10:46             ` Alexey Kardashevskiy
2014-05-22 10:48               ` Alexander Graf
2014-05-22 10:55                 ` Alexey Kardashevskiy
2014-05-22  4:25     ` Alexey Kardashevskiy
2014-05-22  7:11       ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 9/9] spapr_iommu: Introduce bus_offset " Alexey Kardashevskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=537DCCF6.1090108@suse.de \
    --to=agraf@suse.de \
    --cc=aik@ozlabs.ru \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).