From: Alexander Graf <agraf@suse.de>
To: Alexey Kardashevskiy <aik@ozlabs.ru>, qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift in sPAPRTCETable
Date: Thu, 22 May 2014 12:09:58 +0200 [thread overview]
Message-ID: <537DCCF6.1090108@suse.de> (raw)
In-Reply-To: <537D3A7F.3020604@ozlabs.ru>
On 22.05.14 01:45, Alexey Kardashevskiy wrote:
> On 05/22/2014 08:11 AM, Alexander Graf wrote:
>> On 21.05.14 16:21, Alexey Kardashevskiy wrote:
>>> At the moment only 4K pages are supported by sPAPRTCETable. Since sPAPR
>>> spec allows other page sizes and we are going to implement them, we need
>>> page size to be configrable.
>>>
>>> This adds @page_shift into sPAPRTCETable and replaces SPAPR_TCE_PAGE_SHIFT
>>> with it whereever it is possible.
>>>
>>> This removes SPAPR_TCE_PAGE_MASK as it is no longer used.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>> hw/ppc/spapr_iommu.c | 54
>>> +++++++++++++++++++++++++++++---------------------
>>> hw/ppc/spapr_pci.c | 1 +
>>> hw/ppc/spapr_vio.c | 1 +
>>> include/hw/ppc/spapr.h | 3 ++-
>>> 4 files changed, 35 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>> index 90de3e3..e765a6d 100644
>>> --- a/hw/ppc/spapr_iommu.c
>>> +++ b/hw/ppc/spapr_iommu.c
>>> @@ -70,12 +70,13 @@ static IOMMUTLBEntry
>>> spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>> if (tcet->bypass) {
>>> ret.perm = IOMMU_RW;
>>> - } else if ((addr >> SPAPR_TCE_PAGE_SHIFT) < tcet->nb_table) {
>>> + } else if ((addr >> tcet->page_shift) < tcet->nb_table) {
>>> /* Check if we are in bound */
>>> - tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
>>> - ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
>>> - ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>> - ret.addr_mask = SPAPR_TCE_PAGE_MASK;
>>> + target_ulong mask = ~((1 << tcet->page_shift) - 1);
>> Why target_ulong? This should be u64 or hwaddr or something along those
>> lines, no? Also, can the mask grow bigger than 31bits? If so you probably
>> want 1ULL (below as well).
>>
>> In fact, we might be better off with a few more fields to tcet. Just add
>> page_mask and page_size in addition to the page_shift one and use them
>> instead of calculating them over and over again.
>>
>>> + tce = tcet->table[addr >> tcet->page_shift];
>>> + ret.iova = addr & mask;
>>> + ret.translated_addr = tce & mask;
>>> + ret.addr_mask = ~mask;
>>> ret.perm = tce;
>>> }
>>> trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
>>> @@ -113,7 +114,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>> if (kvm_enabled()) {
>>> tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
>>> tcet->nb_table <<
>>> - SPAPR_TCE_PAGE_SHIFT,
>>> + tcet->page_shift,
>>> &tcet->fd);
>>> }
>>> @@ -133,6 +134,7 @@ static int spapr_tce_table_realize(DeviceState *dev)
>>> }
>>> sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
>>> + uint32_t page_shift,
>>> uint32_t nb_table)
>>> {
>>> sPAPRTCETable *tcet;
>>> @@ -149,6 +151,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState
>>> *owner, uint32_t liobn,
>>> tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
>>> tcet->liobn = liobn;
>>> + tcet->page_shift = page_shift;
>>> tcet->nb_table = nb_table;
>>> object_property_add_child(OBJECT(owner), "tce-table",
>>> OBJECT(tcet), NULL);
>>> @@ -194,19 +197,20 @@ static target_ulong put_tce_emu(sPAPRTCETable
>>> *tcet, target_ulong ioba,
>>> target_ulong tce)
>>> {
>>> IOMMUTLBEntry entry;
>>> + target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>> hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
>>> TARGET_FMT_lx "\n", ioba);
>>> return H_PARAMETER;
>>> }
>>> - tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
>>> + tcet->table[ioba >> tcet->page_shift] = tce;
>>> entry.target_as = &address_space_memory,
>>> - entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
>>> - entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
>>> - entry.addr_mask = SPAPR_TCE_PAGE_MASK;
>>> + entry.iova = ioba & mask;
>>> + entry.translated_addr = tce & mask;
>>> + entry.addr_mask = ~mask;
>>> entry.perm = tce;
>>> memory_region_notify_iommu(&tcet->iommu, entry);
>>> @@ -226,6 +230,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>> *cpu,
>>> target_ulong ret = H_PARAMETER;
>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>> CPUState *cs = CPU(cpu);
>>> + target_ulong mask;
>>> if (!tcet) {
>>> return H_PARAMETER;
>>> @@ -235,12 +240,14 @@ static target_ulong h_put_tce_indirect(PowerPCCPU
>>> *cpu,
>>> return H_PARAMETER;
>>> }
>>> - ioba &= ~SPAPR_TCE_PAGE_MASK;
>>> - tce_list &= ~SPAPR_TCE_PAGE_MASK;
>>> + mask = ~((1 << tcet->page_shift) - 1);
>>> + ioba &= mask;
>>> +
>>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>> + target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
>>> + i * sizeof(target_ulong);
>>> + target_ulong tce = ldq_phys(cs->as, off);
>>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>> - target_ulong tce = ldq_phys(cs->as, tce_list +
>>> - i * sizeof(target_ulong));
>>> ret = put_tce_emu(tcet, ioba, tce);
>>> if (ret) {
>>> break;
>>> @@ -276,9 +283,9 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>> return H_PARAMETER;
>>> }
>>> - ioba &= ~SPAPR_TCE_PAGE_MASK;
>>> + ioba &= ~((1 << tcet->page_shift) - 1);
>>> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
>>> + for (i = 0; i < npages; ++i, ioba += (1 << tcet->page_shift)) {
>>> ret = put_tce_emu(tcet, ioba, tce_value);
>>> if (ret) {
>>> break;
>>> @@ -298,7 +305,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>> target_ulong ret = H_PARAMETER;
>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>> + ioba &= ~((1 << tcet->page_shift) - 1);
>>> if (tcet) {
>>> ret = put_tce_emu(tcet, ioba, tce);
>>> @@ -311,13 +318,13 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>> static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>> target_ulong *tce)
>>> {
>>> - if ((ioba >> SPAPR_TCE_PAGE_SHIFT) >= tcet->nb_table) {
>>> + if ((ioba >> tcet->page_shift) >= tcet->nb_table) {
>>> hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
>>> TARGET_FMT_lx "\n", ioba);
>>> return H_PARAMETER;
>>> }
>>> - *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT];
>>> + *tce = tcet->table[ioba >> tcet->page_shift];
>>> return H_SUCCESS;
>>> }
>>> @@ -330,8 +337,9 @@ static target_ulong h_get_tce(PowerPCCPU *cpu,
>>> sPAPREnvironment *spapr,
>>> target_ulong tce = 0;
>>> target_ulong ret = H_PARAMETER;
>>> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
>>> + const target_ulong mask = ~((1 << tcet->page_shift) - 1);
>>> - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>> + ioba &= mask;
>>> if (tcet) {
>>> ret = get_tce_emu(tcet, ioba, &tce);
>>> @@ -382,7 +390,7 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const
>>> char *propname,
>>> }
>>> return spapr_dma_dt(fdt, node_off, propname,
>>> - tcet->liobn, 0, tcet->nb_table <<
>>> SPAPR_TCE_PAGE_SHIFT);
>>> + tcet->liobn, 0, tcet->nb_table <<
>>> tcet->page_shift);
>>> }
>>> static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index fdd4c07..c9850d4 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -656,6 +656,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState
>>> *sphb, Error **errp)
>>> sPAPRTCETable *tcet;
>>> tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
>>> + SPAPR_TCE_PAGE_SHIFT,
>>> 0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
>>> if (!tcet) {
>>> error_setg(errp, "Unable to create TCE table for %s",
>>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>>> index b84e481..d7e9e6a 100644
>>> --- a/hw/ppc/spapr_vio.c
>>> +++ b/hw/ppc/spapr_vio.c
>>> @@ -457,6 +457,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
>>> if (pc->rtce_window_size) {
>>> uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
>>> dev->tcet = spapr_tce_new_table(qdev, liobn,
>>> + SPAPR_TCE_PAGE_SHIFT,
>> I don't quite understand who defines what the TCE page size is for a given
>> device. Can you try to explain this to me?
> If it is default window (for PCI) or window for VIO - it is 4K. If it is a
> dynamic DMA window - page size is a parameter of RTAS call which creates
> the window.
Could we change that default size for non-dynamic windows somehow? 4k is
really fine grained.
Since the KVM in-kernel TCE code really is just a dumb memory poker
without checks, I guess we're fine on that side with dynamic page sizes.
Alex
next prev parent reply other threads:[~2014-05-22 10:10 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-21 14:21 [Qemu-devel] [PATCH 0/9] spapr_pci: Prepare for VFIO Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 1/9] spapr: Enable dynamic change of the supported hypercalls list Alexey Kardashevskiy
2014-05-21 14:26 ` Alexander Graf
2014-05-21 15:21 ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-22 10:47 ` Alexander Graf
2014-05-22 11:01 ` Alexey Kardashevskiy
2014-05-22 11:02 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 2/9] spapr_iommu: Enable multiple TCE requests Alexey Kardashevskiy
2014-05-21 14:37 ` Alexander Graf
2014-05-21 15:23 ` [Qemu-devel] [PATCH v2] " Alexey Kardashevskiy
2014-05-21 16:03 ` Alexey Kardashevskiy
2014-05-21 21:54 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 3/9] spapr_pci: Introduce a finish_realize() callback Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 4/9] spapr_pci: spapr_iommu: Make DMA window a subregion Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 5/9] spapr_pci: Allow multiple TCE tables per PHB Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 6/9] spapr_iommu: Convert old qdev_init_nofail() to object_property_set_bool Alexey Kardashevskiy
2014-05-21 14:21 ` [Qemu-devel] [PATCH 7/9] spapr_iommu: Get rid of window_size in sPAPRTCETable Alexey Kardashevskiy
2014-05-21 22:05 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 8/9] spapr_iommu: Introduce page_shift " Alexey Kardashevskiy
2014-05-21 22:11 ` Alexander Graf
2014-05-21 23:45 ` Alexey Kardashevskiy
2014-05-22 10:09 ` Alexander Graf [this message]
2014-05-22 10:24 ` Alexey Kardashevskiy
2014-05-22 10:45 ` Alexander Graf
2014-05-22 10:46 ` Alexey Kardashevskiy
2014-05-22 10:48 ` Alexander Graf
2014-05-22 10:55 ` Alexey Kardashevskiy
2014-05-22 4:25 ` Alexey Kardashevskiy
2014-05-22 7:11 ` Alexander Graf
2014-05-21 14:21 ` [Qemu-devel] [PATCH 9/9] spapr_iommu: Introduce bus_offset " Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=537DCCF6.1090108@suse.de \
--to=agraf@suse.de \
--cc=aik@ozlabs.ru \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).