From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59868) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WnPwt-0005k2-PD for qemu-devel@nongnu.org; Thu, 22 May 2014 06:10:14 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WnPwm-0006OY-Ob for qemu-devel@nongnu.org; Thu, 22 May 2014 06:10:07 -0400 Message-ID: <537DCCF6.1090108@suse.de> Date: Thu, 22 May 2014 12:09:58 +0200 From: Alexander Graf MIME-Version: 1.0 References: <1400682080-30724-1-git-send-email-aik@ozlabs.ru> <1400682080-30724-9-git-send-email-aik@ozlabs.ru> <537D2483.2000803@suse.de> <537D3A7F.3020604@ozlabs.ru> In-Reply-To: <537D3A7F.3020604@ozlabs.ru> Content-Type: text/plain; charset=KOI8-R; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Alexey Kardashevskiy , qemu-devel@nongnu.org Cc: qemu-ppc@nongnu.org On 22.05.14 01:45, Alexey Kardashevskiy wrote: > On 05/22/2014 08:11 AM, Alexander Graf wrote: >> On 21.05.14 16:21, Alexey Kardashevskiy wrote: >>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR >>> spec allows other page sizes and we are going to implement them, we need >>> page size to be configrable. >>> >>> This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT >>> with it whereever it is possible. >>> >>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used. >>> >>> Signed-off-by: Alexey Kardashevskiy >>> --- >>> hw/ppc/spapr_iommu.c | 54 >>> +++++++++++++++++++++++++++++--------------------- >>> hw/ppc/spapr_pci.c | 1 + >>> hw/ppc/spapr_vio.c | 1 + >>> include/hw/ppc/spapr.h | 3 ++- >>> 4 files changed, 35 insertions(+), 24 deletions(-) >>> >>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >>> index 90de3e3..e765a6d 100644 >>> --- a/hw/ppc/spapr_iommu.c >>> +++ b/hw/ppc/spapr_iommu.c >>> @@ -70,12 +70,13 @@ static IOMMUTLBEntry >>> spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) >>> if (tcet->bypass) { >>> ret.perm = IOMMU_RW; >>> - } else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) { >>> + } else if ((addr >> tcet->page_shift) < tcet->nb_table) { >>> /* Check if we are in bound */ >>> - tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT]; >>> - ret.iova = addr & ~SPAPR_TCE_PAGE_MASK; >>> - ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK; >>> - ret.addr_mask = SPAPR_TCE_PAGE_MASK; >>> + target_ulong mask = ~((1 << tcet->page_shift) - 1); >> Why target_ulong? This should be u64 or hwaddr or something along those >> lines, no? Also, can the mask grow bigger than 31bits? If so you probably >> want 1ULL (below as well). >> >> In fact, we might be better off with a few more fields to tcet. Just add >> page_mask and page_size in addition to the page_shift one and use them >> instead of calculating them over and over again. >> >>> + tce = tcet->table[addr >> tcet->page_shift]; >>> + ret.iova = addr & mask; >>> + ret.translated_addr = tce & mask; >>> + ret.addr_mask = ~mask; >>> ret.perm = tce; >>> } >>> trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm, >>> @@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev) >>> if (kvm_enabled()) { >>> tcet->table = kvmppc_create_spapr_tce(tcet->liobn, >>> tcet->nb_table << >>> - SPAPR_TCE_PAGE_SHIFT, >>> + tcet->page_shift, >>> &tcet->fd); >>> } >>> @@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState *dev) >>> } >>> sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, >>> + uint32_t page_shift, >>> uint32_t nb_table) >>> { >>> sPAPRTCETable *tcet; >>> @@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState >>> *owner, uint32_t liobn, >>> tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE)); >>> tcet->liobn = liobn; >>> + tcet->page_shift = page_shift; >>> tcet->nb_table = nb_table; >>> object_property_add_child(OBJECT(owner), "tce-table", >>> OBJECT(tcet), NULL); >>> @@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable >>> *tcet, target_ulong ioba, >>> target_ulong tce) >>> { >>> IOMMUTLBEntry entry; >>> + target_ulong mask = ~((1 << tcet->page_shift) - 1); >>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) { >>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) { >>> hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x" >>> TARGET_FMT_lx "\n", ioba); >>> return H_PARAMETER; >>> } >>> - tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce; >>> + tcet->table[ioba >> tcet->page_shift] = tce; >>> entry.target_as = &address_space_memory, >>> - entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK; >>> - entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK; >>> - entry.addr_mask = SPAPR_TCE_PAGE_MASK; >>> + entry.iova = ioba & mask; >>> + entry.translated_addr = tce & mask; >>> + entry.addr_mask = ~mask; >>> entry.perm = tce; >>> memory_region_notify_iommu(&tcet->iommu, entry); >>> @@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU >>> *cpu, >>> target_ulong ret = H_PARAMETER; >>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); >>> CPUState *cs = CPU(cpu); >>> + target_ulong mask; >>> if (!tcet) { >>> return H_PARAMETER; >>> @@ -235,12 +240,14 @@ static target_ulong h_put_tce_indirect(PowerPCCPU >>> *cpu, >>> return H_PARAMETER; >>> } >>> - ioba &= ~SPAPR_TCE_PAGE_MASK; >>> - tce_list &= ~SPAPR_TCE_PAGE_MASK; >>> + mask = ~((1 << tcet->page_shift) - 1); >>> + ioba &= mask; >>> + >>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) { >>> + target_ulong off = (tce_list & ~SPAPR_TCE_RW) + >>> + i * sizeof(target_ulong); >>> + target_ulong tce = ldq_phys(cs->as, off); >>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) { >>> - target_ulong tce = ldq_phys(cs->as, tce_list + >>> - i * sizeof(target_ulong)); >>> ret = put_tce_emu(tcet, ioba, tce); >>> if (ret) { >>> break; >>> @@ -276,9 +283,9 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, >>> sPAPREnvironment *spapr, >>> return H_PARAMETER; >>> } >>> - ioba &= ~SPAPR_TCE_PAGE_MASK; >>> + ioba &= ~((1 << tcet->page_shift) - 1); >>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) { >>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) { >>> ret = put_tce_emu(tcet, ioba, tce_value); >>> if (ret) { >>> break; >>> @@ -298,7 +305,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, >>> sPAPREnvironment *spapr, >>> target_ulong ret = H_PARAMETER; >>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); >>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1); >>> + ioba &= ~((1 << tcet->page_shift) - 1); >>> if (tcet) { >>> ret = put_tce_emu(tcet, ioba, tce); >>> @@ -311,13 +318,13 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, >>> sPAPREnvironment *spapr, >>> static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, >>> target_ulong *tce) >>> { >>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) { >>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) { >>> hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x" >>> TARGET_FMT_lx "\n", ioba); >>> return H_PARAMETER; >>> } >>> - *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT]; >>> + *tce = tcet->table[ioba >> tcet->page_shift]; >>> return H_SUCCESS; >>> } >>> @@ -330,8 +337,9 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, >>> sPAPREnvironment *spapr, >>> target_ulong tce = 0; >>> target_ulong ret = H_PARAMETER; >>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); >>> + const target_ulong mask = ~((1 << tcet->page_shift) - 1); >>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1); >>> + ioba &= mask; >>> if (tcet) { >>> ret = get_tce_emu(tcet, ioba, &tce); >>> @@ -382,7 +390,7 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const >>> char *propname, >>> } >>> return spapr_dma_dt(fdt, node_off, propname, >>> - tcet->liobn, 0, tcet->nb_table << >>> SPAPR_TCE_PAGE_SHIFT); >>> + tcet->liobn, 0, tcet->nb_table << >>> tcet->page_shift); >>> } >>> static void spapr_tce_table_class_init(ObjectClass *klass, void *data) >>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c >>> index fdd4c07..c9850d4 100644 >>> --- a/hw/ppc/spapr_pci.c >>> +++ b/hw/ppc/spapr_pci.c >>> @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState >>> *sphb, Error **errp) >>> sPAPRTCETable *tcet; >>> tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, >>> + SPAPR_TCE_PAGE_SHIFT, >>> 0x40000000 >> SPAPR_TCE_PAGE_SHIFT); >>> if (!tcet) { >>> error_setg(errp, "Unable to create TCE table for %s", >>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c >>> index b84e481..d7e9e6a 100644 >>> --- a/hw/ppc/spapr_vio.c >>> +++ b/hw/ppc/spapr_vio.c >>> @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) >>> if (pc->rtce_window_size) { >>> uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; >>> dev->tcet = spapr_tce_new_table(qdev, liobn, >>> + SPAPR_TCE_PAGE_SHIFT, >> I don't quite understand who defines what the TCE page size is for a given >> device. Can you try to explain this to me? > If it is default window (for PCI) or window for VIO - it is 4K. If it is a > dynamic DMA window - page size is a parameter of RTAS call which creates > the window. Could we change that default size for non-dynamic windows somehow? 4k is really fine grained. Since the KVM in-kernel TCE code really is just a dumb memory poker without checks, I guess we're fine on that side with dynamic page sizes. Alex