From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Alexander Graf <agraf@suse.de>, qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable
Date: Thu, 22 May 2014 20:24:54 +1000 [thread overview]
Message-ID: <537DD076.4030503@ozlabs.ru> (raw)
In-Reply-To: <537DCCF6.1090108@suse.de>
On 05/22/2014 08:09 PM, Alexander Graf wrote:
>
> On 22.05.14 01:45, Alexey Kardashevskiy wrote:
>> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
>>>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
>>>> spec allows other page sizes and we are going to implement them, we need
>>>> page size to be configrable.
>>>>
>>>> This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
>>>> with it whereever it is possible.
>>>>
>>>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>> hw/ppc/spapr_iommu.c | 54
>>>> +++++++++++++++++++++++++++++---------------------
>>>> hw/ppc/spapr_pci.c | 1 +
>>>> hw/ppc/spapr_vio.c | 1 +
>>>> include/hw/ppc/spapr.h | 3 ++-
>>>> 4 files changed, 35 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>>> index 90de3e3..e765a6d 100644
>>>> --- a/hw/ppc/spapr_iommu.c
>>>> +++ b/hw/ppc/spapr_iommu.c
>>>> @@ -70,12 +70,13 @@ static IOMMUTLBEntry
>>>> spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>>> if (tcet->bypass) {
>>>> ret.perm = IOMMU_RW;
>>>> - } else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
>>>> + } else if ((addr >> tcet->page_shift) < tcet->nb_table) {
>>>> /* Check if we are in bound */
>>>> - tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
>>>> - ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
>>>> - ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>>> - ret.addr_mask = SPAPR_TCE_PAGE_MASK;
>>>> + target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>> Why target_ulong? This should be u64 or hwaddr or something along those
>>> lines, no? Also, can the mask grow bigger than 31bits? If so you probably
>>> want 1ULL (below as well).
>>>
>>> In fact, we might be better off with a few more fields to tcet. Just add
>>> page_mask and page_size in addition to the page_shift one and use them
>>> instead of calculating them over and over again.
>>>
>>>> + tce = tcet->table[addr >> tcet->page_shift];
>>>> + ret.iova = addr & mask;
>>>> + ret.translated_addr = tce & mask;
>>>> + ret.addr_mask = ~mask;
>>>> ret.perm = tce;
>>>> }
>>>> trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
>>>> @@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>>> if (kvm_enabled()) {
>>>> tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
>>>> tcet->nb_table <<
>>>> - SPAPR_TCE_PAGE_SHIFT,
>>>> + tcet->page_shift,
>>>> &tcet->fd);
>>>> }
>>>> @@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState
>>>> *dev)
>>>> }
>>>> sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t
>>>> liobn,
>>>> + uint32_t page_shift,
>>>> uint32_t nb_table)
>>>> {
>>>> sPAPRTCETable *tcet;
>>>> @@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState
>>>> *owner, uint32_t liobn,
>>>> tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
>>>> tcet->liobn = liobn;
>>>> + tcet->page_shift = page_shift;
>>>> tcet->nb_table = nb_table;
>>>> object_property_add_child(OBJECT(owner), "tce-table",
>>>> OBJECT(tcet), NULL);
>>>> @@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable
>>>> *tcet, target_ulong ioba,
>>>> target_ulong tce)
>>>> {
>>>> IOMMUTLBEntry entry;
>>>> + target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>>> hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
>>>> TARGET_FMT_lx "\n", ioba);
>>>> return H_PARAMETER;
>>>> }
>>>> - tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
>>>> + tcet->table[ioba >> tcet->page_shift] = tce;
>>>> entry.target_as = &address_space_memory,
>>>> - entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
>>>> - entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>>> - entry.addr_mask = SPAPR_TCE_PAGE_MASK;
>>>> + entry.iova = ioba & mask;
>>>> + entry.translated_addr = tce & mask;
>>>> + entry.addr_mask = ~mask;
>>>> entry.perm = tce;
>>>> memory_region_notify_iommu(&tcet->iommu, entry);
>>>> @@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>>> *cpu,
>>>> target_ulong ret = H_PARAMETER;
>>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>>> CPUState *cs = CPU(cpu);
>>>> + target_ulong mask;
>>>> if (!tcet) {
>>>> return H_PARAMETER;
>>>> @@ -235,12 +240,14 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>>> *cpu,
>>>> return H_PARAMETER;
>>>> }
>>>> - ioba &= ~SPAPR_TCE_PAGE_MASK;
>>>> - tce_list &= ~SPAPR_TCE_PAGE_MASK;
>>>> + mask = ~((1 << tcet->page_shift) - 1);
>>>> + ioba &= mask;
>>>> +
>>>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>>> + target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
>>>> + i * sizeof(target_ulong);
>>>> + target_ulong tce = ldq_phys(cs->as, off);
>>>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>>> - target_ulong tce = ldq_phys(cs->as, tce_list +
>>>> - i * sizeof(target_ulong));
>>>> ret = put_tce_emu(tcet, ioba, tce);
>>>> if (ret) {
>>>> break;
>>>> @@ -276,9 +283,9 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
>>>> sPAPREnvironment *spapr,
>>>> return H_PARAMETER;
>>>> }
>>>> - ioba &= ~SPAPR_TCE_PAGE_MASK;
>>>> + ioba &= ~((1 << tcet->page_shift) - 1);
>>>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>>> ret = put_tce_emu(tcet, ioba, tce_value);
>>>> if (ret) {
>>>> break;
>>>> @@ -298,7 +305,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>>> sPAPREnvironment *spapr,
>>>> target_ulong ret = H_PARAMETER;
>>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>>> + ioba &= ~((1 << tcet->page_shift) - 1);
>>>> if (tcet) {
>>>> ret = put_tce_emu(tcet, ioba, tce);
>>>> @@ -311,13 +318,13 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>>> sPAPREnvironment *spapr,
>>>> static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>>> target_ulong *tce)
>>>> {
>>>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>>> hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
>>>> TARGET_FMT_lx "\n", ioba);
>>>> return H_PARAMETER;
>>>> }
>>>> - *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT];
>>>> + *tce = tcet->table[ioba >> tcet->page_shift];
>>>> return H_SUCCESS;
>>>> }
>>>> @@ -330,8 +337,9 @@ static target_ulong h_get_tce(PowerPCCPU *cpu,
>>>> sPAPREnvironment *spapr,
>>>> target_ulong tce = 0;
>>>> target_ulong ret = H_PARAMETER;
>>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>>> + const target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>>> + ioba &= mask;
>>>> if (tcet) {
>>>> ret = get_tce_emu(tcet, ioba, &tce);
>>>> @@ -382,7 +390,7 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const
>>>> char *propname,
>>>> }
>>>> return spapr_dma_dt(fdt, node_off, propname,
>>>> - tcet->liobn, 0, tcet->nb_table <<
>>>> SPAPR_TCE_PAGE_SHIFT);
>>>> + tcet->liobn, 0, tcet->nb_table <<
>>>> tcet->page_shift);
>>>> }
>>>> static void spapr_tce_table_class_init(ObjectClass *klass, void
>>>> *data)
>>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>>> index fdd4c07..c9850d4 100644
>>>> --- a/hw/ppc/spapr_pci.c
>>>> +++ b/hw/ppc/spapr_pci.c
>>>> @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
>>>> *sphb, Error **errp)
>>>> sPAPRTCETable *tcet;
>>>> tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
>>>> + SPAPR_TCE_PAGE_SHIFT,
>>>> 0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
>>>> if (!tcet) {
>>>> error_setg(errp, "Unable to create TCE table for %s",
>>>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>>>> index b84e481..d7e9e6a 100644
>>>> --- a/hw/ppc/spapr_vio.c
>>>> +++ b/hw/ppc/spapr_vio.c
>>>> @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
>>>> if (pc->rtce_window_size) {
>>>> uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
>>>> dev->tcet = spapr_tce_new_table(qdev, liobn,
>>>> + SPAPR_TCE_PAGE_SHIFT,
>>> I don't quite understand who defines what the TCE page size is for a given
>>> device. Can you try to explain this to me?
>> If it is default window (for PCI) or window for VIO - it is 4K. If it is a
>> dynamic DMA window - page size is a parameter of RTAS call which creates
>> the window.
>
> Could we change that default size for non-dynamic windows somehow? 4k is
> really fine grained.
No, this is hardcoded in a million places and old distros. Normally it is
maximum 1GB or 2MB table, not too bad. And with DDW support, we can make
the default one really small (64MB?) and lose even less.
SPAPR:
R1–7.3.31–4. For the Dynamic DMA Windows (DDW) option: The platform must
provide a default DMA window
for each PE, and all of the following must be true:
a. The window is defined by the “ibm,dma-window” property in the OF device
tree.
b. The window is defined with 4 KB I/O pages.
c. The window is located entirely below 4 GB.
>
> Since the KVM in-kernel TCE code really is just a dumb memory poker without
> checks, I guess we're fine on that side with dynamic page sizes.
Yep.
--
Alexey
next prev parent reply other threads:[~2014-05-22 10:25 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-21 14:21 [Qemu-devel] [PATCH 0/9] spapr_pci: Prepare for VFIO Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 1/9] spapr: Enable dynamic change of the supported hypercalls list Alexey Kardashevskiy
2014-05-21 14:26 ` Alexander Graf
2014-05-21 15:21 ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-22 10:47 ` Alexander Graf
2014-05-22 11:01 ` Alexey Kardashevskiy
2014-05-22 11:02 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 2/9] spapr_iommu: Enable multiple TCE requests Alexey Kardashevskiy
2014-05-21 14:37 ` Alexander Graf
2014-05-21 15:23 ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-21 16:03 ` Alexey Kardashevskiy
2014-05-21 21:54 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 3/9] spapr_pci: Introduce a finish_realize() callback Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 4/9] spapr_pci: spapr_iommu: Make DMA window a subregion Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 5/9] spapr_pci: Allow multiple TCE tables per PHB Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 6/9] spapr_iommu: Convert old qdev_init_nofail() to object_property_set_bool Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 7/9] spapr_iommu: Get rid of window_size in sPAPRTCETable Alexey Kardashevskiy
2014-05-21 22:05 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift " Alexey Kardashevskiy
2014-05-21 22:11 ` Alexander Graf
2014-05-21 23:45 ` Alexey Kardashevskiy
2014-05-22 10:09 ` Alexander Graf
2014-05-22 10:24 ` Alexey Kardashevskiy [this message]
2014-05-22 10:45 ` Alexander Graf
2014-05-22 10:46 ` Alexey Kardashevskiy
2014-05-22 10:48 ` Alexander Graf
2014-05-22 10:55 ` Alexey Kardashevskiy
2014-05-22 4:25 ` Alexey Kardashevskiy
2014-05-22 7:11 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 9/9] spapr_iommu: Introduce bus_offset " Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=537DD076.4030503@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=agraf@suse.de \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).