From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: linuxppc-dev@lists.ozlabs.org
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>,
Michael Ellerman <michael@ellerman.id.au>,
Paul Mackerras <paulus@samba.org>,
Gavin Shan <gwshan@linux.vnet.ibm.com>
Subject: [PATCH v4 13/16] powerpc/powernv: Implement Dynamic DMA windows (DDW) for IODA
Date: Wed, 30 Jul 2014 19:31:32 +1000 [thread overview]
Message-ID: <1406712695-9491-14-git-send-email-aik@ozlabs.ru> (raw)
In-Reply-To: <1406712695-9491-1-git-send-email-aik@ozlabs.ru>
SPAPR defines an interface to create additional DMA windows dynamically.
"Dynamically" means that the window is not allocated at the guest start
and the guest can request it later. In practice, existing linux guests
check for the capability and if it is there, they create+map one big DMA
window as big as the entire guest RAM.
SPAPR defines 4 RTAS calls for this feature which userspace implements.
This adds 4 callbacks into the spapr_tce_iommu_ops struct:
1. query - ibm,query-pe-dma-window - returns number/size of windows
which can be created (one, any page size);
2. create - ibm,create-pe-dma-window - creates a window;
3. remove - ibm,remove-pe-dma-window - removes a window; only additional
window created by create() can be removed, the default 32bit window cannot
be removed as guests do not expect new windows to start from zero;
4. reset - ibm,reset-pe-dma-window - reset the DMA windows configuration
to the default state; now it only removes the additional window if it
was created.
The next patch will add corresponding ioctls to VFIO SPAPR TCE driver to
pass RTAS call from the userspace to the IODA code.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/include/asm/tce.h | 21 ++++
arch/powerpc/platforms/powernv/pci-ioda.c | 158 +++++++++++++++++++++++++++++-
arch/powerpc/platforms/powernv/pci.h | 2 +
3 files changed, 180 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
index 5ee4987..583463b 100644
--- a/arch/powerpc/include/asm/tce.h
+++ b/arch/powerpc/include/asm/tce.h
@@ -60,6 +60,27 @@ struct spapr_tce_iommu_ops {
phys_addr_t addr);
void (*take_ownership)(struct spapr_tce_iommu_group *data,
bool enable);
+
+ /* Dynamic DMA window */
+ /* Page size flags for ibm,query-pe-dma-window */
+#define DDW_PGSIZE_4K 0x01
+#define DDW_PGSIZE_64K 0x02
+#define DDW_PGSIZE_16M 0x04
+#define DDW_PGSIZE_32M 0x08
+#define DDW_PGSIZE_64M 0x10
+#define DDW_PGSIZE_128M 0x20
+#define DDW_PGSIZE_256M 0x40
+#define DDW_PGSIZE_16G 0x80
+ long (*query)(struct spapr_tce_iommu_group *data,
+ __u32 *windows_available,
+ __u32 *page_size_mask);
+ long (*create)(struct spapr_tce_iommu_group *data,
+ __u32 page_shift,
+ __u32 window_shift,
+ struct iommu_table **ptbl);
+ long (*remove)(struct spapr_tce_iommu_group *data,
+ struct iommu_table *tbl);
+ long (*reset)(struct spapr_tce_iommu_group *data);
};
struct spapr_tce_iommu_group {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7482518..6a847b2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -754,6 +754,24 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
pnv_pci_ioda2_set_bypass(pe, true);
}
+static struct iommu_table *pnv_ioda2_iommu_get_table(
+ struct spapr_tce_iommu_group *data,
+ phys_addr_t addr)
+{
+ struct pnv_ioda_pe *pe = data->iommu_owner;
+
+ if (addr == TCE_DEFAULT_WINDOW)
+ return &pe->tce32.table;
+
+ if (pnv_pci_ioda_check_addr(&pe->tce64.table, addr))
+ return &pe->tce64.table;
+
+ if (pnv_pci_ioda_check_addr(&pe->tce32.table, addr))
+ return &pe->tce32.table;
+
+ return NULL;
+}
+
static void pnv_ioda2_take_ownership(struct spapr_tce_iommu_group *data,
bool enable)
{
@@ -762,9 +780,147 @@ static void pnv_ioda2_take_ownership(struct spapr_tce_iommu_group *data,
pnv_pci_ioda2_set_bypass(pe, !enable);
}
+static long pnv_pci_ioda2_ddw_query(struct spapr_tce_iommu_group *data,
+ __u32 *windows_available, __u32 *page_size_mask)
+{
+ struct pnv_ioda_pe *pe = data->iommu_owner;
+
+ if (pe->tce64_active) {
+ *page_size_mask = 0;
+ *windows_available = 0;
+ } else {
+ *page_size_mask =
+ DDW_PGSIZE_4K |
+ DDW_PGSIZE_64K |
+ DDW_PGSIZE_16M;
+ *windows_available = 1;
+ }
+
+ return 0;
+}
+
+static long pnv_pci_ioda2_ddw_create(struct spapr_tce_iommu_group *data,
+ __u32 page_shift, __u32 window_shift,
+ struct iommu_table **ptbl)
+{
+ struct pnv_ioda_pe *pe = data->iommu_owner;
+ struct pnv_phb *phb = pe->phb;
+ struct page *tce_mem = NULL;
+ void *addr;
+ long ret;
+ unsigned long tce_table_size =
+ (1ULL << (window_shift - page_shift)) * 8;
+ unsigned order;
+ struct iommu_table *tbl64 = &pe->tce64.table;
+
+ if ((page_shift != 12) && (page_shift != 16) && (page_shift != 24))
+ return -EINVAL;
+
+ if (window_shift > (memory_hotplug_max() >> page_shift))
+ return -EINVAL;
+
+ if (pe->tce64_active)
+ return -EBUSY;
+
+ tce_table_size = max(0x1000UL, tce_table_size);
+ order = get_order(tce_table_size);
+
+ pe_info(pe, "Setting up DDW at %llx..%llx ws=0x%x ps=0x%x table_size=0x%lx order=0x%x\n",
+ pe->tce_bypass_base,
+ pe->tce_bypass_base + (1ULL << window_shift) - 1,
+ window_shift, page_shift, tce_table_size, order);
+
+ tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, order);
+ if (!tce_mem) {
+ pe_err(pe, " Failed to allocate a DDW\n");
+ return -EFAULT;
+ }
+ addr = page_address(tce_mem);
+ memset(addr, 0, tce_table_size);
+
+ /* Configure HW */
+ ret = opal_pci_map_pe_dma_window(phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + 1, /* Window number */
+ 1,
+ __pa(addr),
+ tce_table_size,
+ 1 << page_shift);
+ if (ret) {
+ pe_err(pe, " Failed to configure 32-bit TCE table, err %ld\n",
+ ret);
+ return -EFAULT;
+ }
+
+ /* Setup linux iommu table */
+ pnv_pci_setup_iommu_table(tbl64, addr, tce_table_size,
+ pe->tce_bypass_base, page_shift);
+ pe->tce64.pe = pe;
+
+ /* Copy "invalidate" register address */
+ tbl64->it_index = pe->tce32.table.it_index;
+ tbl64->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
+ TCE_PCI_SWINV_PAIR;
+ tbl64->it_map = (void *) 0xDEADBEEF; /* poison */
+ tbl64->it_ops = pe->tce32.table.it_ops;
+
+ *ptbl = tbl64;
+
+ pe->tce64_active = true;
+
+ return 0;
+}
+
+static long pnv_pci_ioda2_ddw_remove(struct spapr_tce_iommu_group *data,
+ struct iommu_table *tbl)
+{
+ struct pnv_ioda_pe *pe = data->iommu_owner;
+ struct pnv_phb *phb = pe->phb;
+ long ret;
+
+ /* Only additional 64bit window removal is supported */
+ if ((tbl != &pe->tce64.table) || !pe->tce64_active)
+ return -EFAULT;
+
+ pe_info(pe, "Removing huge 64bit DMA window\n");
+
+ iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+
+ pe->tce64_active = false;
+
+ ret = opal_pci_map_pe_dma_window(phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + 1,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (ret)
+ pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+
+ free_pages(tbl->it_base, get_order(tbl->it_size << 3));
+ memset(&pe->tce64, 0, sizeof(pe->tce64));
+
+ return ret;
+}
+
+static long pnv_pci_ioda2_ddw_reset(struct spapr_tce_iommu_group *data)
+{
+ struct pnv_ioda_pe *pe = data->iommu_owner;
+
+ pe_info(pe, "Reset DMA windows\n");
+
+ if (!pe->tce64_active)
+ return 0;
+
+ return pnv_pci_ioda2_ddw_remove(data, &pe->tce64.table);
+}
+
static struct spapr_tce_iommu_ops pnv_pci_ioda2_ops = {
- .get_table = pnv_ioda1_iommu_get_table,
+ .get_table = pnv_ioda2_iommu_get_table,
.take_ownership = pnv_ioda2_take_ownership,
+ .query = pnv_pci_ioda2_ddw_query,
+ .create = pnv_pci_ioda2_ddw_create,
+ .remove = pnv_pci_ioda2_ddw_remove,
+ .reset = pnv_pci_ioda2_ddw_reset
};
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index ab85743..f25f633 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -64,6 +64,8 @@ struct pnv_ioda_pe {
int tce32_segcount;
struct pnv_iommu_table tce32;
phys_addr_t tce_inval_reg_phys;
+ bool tce64_active;
+ struct pnv_iommu_table tce64;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
--
2.0.0
next prev parent reply other threads:[~2014-07-30 9:31 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-07-30 9:31 [PATCH v4 00/16] powernv: vfio: Add Dynamic DMA windows (DDW) Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 01/16] rcu: Define notrace version of list_for_each_entry_rcu and list_entry_rcu Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 02/16] KVM: PPC: Use RCU for arch.spapr_tce_tables Alexey Kardashevskiy
2014-08-21 5:25 ` Paul Mackerras
2014-07-30 9:31 ` [PATCH v4 03/16] mm: Add helpers for locked_vm Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 04/16] KVM: PPC: Account TCE-containing pages in locked_vm Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 05/16] powerpc/iommu: Fix comments with it_page_shift Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 06/16] powerpc/powernv: Make invalidate() a callback Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 07/16] powerpc/spapr: vfio: Implement spapr_tce_iommu_ops Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 08/16] powerpc/powernv: Convert/move set_bypass() callback to take_ownership() Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 09/16] powerpc/iommu: Fix IOMMU ownership control functions Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 10/16] powerpc: Move tce_xxx callbacks from ppc_md to iommu_table Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 11/16] powerpc/powernv: Release replaced TCE Alexey Kardashevskiy
2014-08-06 6:25 ` Benjamin Herrenschmidt
2014-08-06 6:27 ` Benjamin Herrenschmidt
2014-08-06 6:27 ` Benjamin Herrenschmidt
2014-07-30 9:31 ` [PATCH v4 12/16] powerpc/pseries/lpar: Enable VFIO Alexey Kardashevskiy
2014-07-30 9:31 ` Alexey Kardashevskiy [this message]
2014-07-30 9:31 ` [PATCH v4 14/16] vfio: powerpc/spapr: Reuse locked_vm accounting helpers Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 15/16] vfio: powerpc/spapr: Use it_page_size Alexey Kardashevskiy
2014-07-30 9:31 ` [PATCH v4 16/16] vfio: powerpc/spapr: Enable Dynamic DMA windows Alexey Kardashevskiy
2014-07-30 9:36 ` Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1406712695-9491-14-git-send-email-aik@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=gwshan@linux.vnet.ibm.com \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=michael@ellerman.id.au \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).