linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 11/38] powerpc/dart: Use a cachable DART
Date: Mon, 27 Jun 2016 21:29:09 +1000	[thread overview]
Message-ID: <1467026976-7974-12-git-send-email-benh@kernel.crashing.org> (raw)
In-Reply-To: <1467026976-7974-1-git-send-email-benh@kernel.crashing.org>

Instead of punching a hole in the linear mapping, just use normal
cachable memory, and apply the flush sequence documented in the
CPC625 (aka U3) user manual.

This allows us to remove quite a bit of code related to the early
allocation of the DART and the hole in the linear mapping. We can
also get rid of the copy of the DART for suspend/resume as the
original memory can just be saved/restored now, as long as we
properly sync the caches.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/iommu.h        |   1 -
 arch/powerpc/mm/hash_utils_64.c         |  32 ------
 arch/powerpc/platforms/maple/setup.c    |   7 --
 arch/powerpc/platforms/powermac/setup.c |   8 --
 arch/powerpc/sysdev/dart_iommu.c        | 180 +++++++++++++++-----------------
 5 files changed, 84 insertions(+), 144 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7b87bab..f49a72a 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void);
 extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
 extern void iommu_init_early_pasemi(void);
 
-extern void alloc_dart_table(void);
 #if defined(CONFIG_PPC64) && defined(CONFIG_PM)
 static inline void iommu_save(void)
 {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b2740c6..2bea864 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -87,10 +87,6 @@
  *
  */
 
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
 static unsigned long _SDR1;
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 EXPORT_SYMBOL_GPL(mmu_psize_defs);
@@ -828,34 +824,6 @@ static void __init htab_initialize(void)
 		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
 		    base, size, prot);
 
-#ifdef CONFIG_U3_DART
-		/* Do not map the DART space. Fortunately, it will be aligned
-		 * in such a way that it will not cross two memblock regions and
-		 * will fit within a single 16Mb page.
-		 * The DART space is assumed to be a full 16Mb region even if
-		 * we only use 2Mb of that space. We will use more of it later
-		 * for AGP GART. We have to use a full 16Mb large page.
-		 */
-		DBG("DART base: %lx\n", dart_tablebase);
-
-		if (dart_tablebase != 0 && dart_tablebase >= base
-		    && dart_tablebase < (base + size)) {
-			unsigned long dart_table_end = dart_tablebase + 16 * MB;
-			if (base != dart_tablebase)
-				BUG_ON(htab_bolt_mapping(base, dart_tablebase,
-							__pa(base), prot,
-							mmu_linear_psize,
-							mmu_kernel_ssize));
-			if ((base + size) > dart_table_end)
-				BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
-							base + size,
-							__pa(dart_table_end),
-							 prot,
-							 mmu_linear_psize,
-							 mmu_kernel_ssize));
-			continue;
-		}
-#endif /* CONFIG_U3_DART */
 		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
 				prot, mmu_linear_psize, mmu_kernel_ssize));
 	}
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index a837188..3cd625d 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -303,13 +303,6 @@ static int __init maple_probe(void)
 	if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
 	    !of_flat_dt_is_compatible(root, "Momentum,Apache"))
 		return 0;
-	/*
-	 * On U3, the DART (iommu) must be allocated now since it
-	 * has an impact on htab_initialize (due to the large page it
-	 * occupies having to be broken up so the DART itself is not
-	 * part of the cacheable linar mapping
-	 */
-	alloc_dart_table();
 
 	hpte_init_native();
 	pm_power_off = maple_power_off;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 8dd78f4..19de197 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -605,14 +605,6 @@ static int __init pmac_probe(void)
 		return 0;
 
 #ifdef CONFIG_PPC64
-	/*
-	 * On U3, the DART (iommu) must be allocated now since it
-	 * has an impact on htab_initialize (due to the large page it
-	 * occupies having to be broken up so the DART itself is not
-	 * part of the cacheable linar mapping
-	 */
-	alloc_dart_table();
-
 	hpte_init_native();
 #endif
 
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index b734863..53f862e 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -48,16 +48,10 @@
 
 #include "dart.h"
 
-/* Physical base address and size of the DART table */
-unsigned long dart_tablebase; /* exported to htab_initialize */
+/* DART table address and size */
+static u32 *dart_tablebase;
 static unsigned long dart_tablesize;
 
-/* Virtual base address of the DART table */
-static u32 *dart_vbase;
-#ifdef CONFIG_PM
-static u32 *dart_copy;
-#endif
-
 /* Mapped base address for the dart */
 static unsigned int __iomem *dart;
 
@@ -151,6 +145,32 @@ wait_more:
 	spin_unlock_irqrestore(&invalidate_lock, flags);
 }
 
+static void dart_cache_sync(unsigned int *base, unsigned int count)
+{
+	/* We add 1 to the number of entries to flush, following a
+	 * comment in Darwin indicating that the memory controller
+	 * can prefetch unmapped memory under some circumstances
+	 */
+	unsigned long start = (unsigned long)base;
+	unsigned long end = start + (count + 1) * sizeof(unsigned int);
+	unsigned int tmp;
+
+	/* Perform a standard cache flush */
+	flush_inval_dcache_range(start, end);
+
+	/* Perform the sequence described in the CPC925 manual to
+	 * ensure all the data gets to a point the cache incoherent
+	 * DART hardware will see
+	 */
+	asm volatile(" sync;"
+		     " isync;"
+		     " dcbf 0,%1;"
+		     " sync;"
+		     " isync;"
+		     " lwz %0,0(%1);"
+		     " isync" : "=r" (tmp) : "r" (end) : "memory");
+}
+
 static void dart_flush(struct iommu_table *tbl)
 {
 	mb();
@@ -165,13 +185,13 @@ static int dart_build(struct iommu_table *tbl, long index,
 		       enum dma_data_direction direction,
 		       struct dma_attrs *attrs)
 {
-	unsigned int *dp;
+	unsigned int *dp, *orig_dp;
 	unsigned int rpn;
 	long l;
 
 	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
 
-	dp = ((unsigned int*)tbl->it_base) + index;
+	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
 
 	/* On U3, all memory is contiguous, so we can move this
 	 * out of the loop.
@@ -184,11 +204,7 @@ static int dart_build(struct iommu_table *tbl, long index,
 
 		uaddr += DART_PAGE_SIZE;
 	}
-
-	/* make sure all updates have reached memory */
-	mb();
-	in_be32((unsigned __iomem *)dp);
-	mb();
+	dart_cache_sync(orig_dp, npages);
 
 	if (dart_is_u4) {
 		rpn = index;
@@ -203,7 +219,8 @@ static int dart_build(struct iommu_table *tbl, long index,
 
 static void dart_free(struct iommu_table *tbl, long index, long npages)
 {
-	unsigned int *dp;
+	unsigned int *dp, *orig_dp;
+	long orig_npages = npages;
 
 	/* We don't worry about flushing the TLB cache. The only drawback of
 	 * not doing it is that we won't catch buggy device drivers doing
@@ -212,34 +229,29 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
 
 	DBG("dart: free at: %lx, %lx\n", index, npages);
 
-	dp  = ((unsigned int *)tbl->it_base) + index;
+	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
 
 	while (npages--)
 		*(dp++) = dart_emptyval;
-}
 
+	dart_cache_sync(orig_dp, orig_npages);
+}
 
-static int __init dart_init(struct device_node *dart_node)
+static void allocate_dart(void)
 {
-	unsigned int i;
-	unsigned long tmp, base, size;
-	struct resource r;
+	unsigned long tmp;
 
-	if (dart_tablebase == 0 || dart_tablesize == 0) {
-		printk(KERN_INFO "DART: table not allocated, using "
-		       "direct DMA\n");
-		return -ENODEV;
-	}
-
-	if (of_address_to_resource(dart_node, 0, &r))
-		panic("DART: can't get register base ! ");
+	/* 512 pages (2MB) is max DART tablesize. */
+	dart_tablesize = 1UL << 21;
 
-	/* Make sure nothing from the DART range remains in the CPU cache
-	 * from a previous mapping that existed before the kernel took
-	 * over
+	/* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
+	 * will blow up an entire large page anyway in the kernel mapping
 	 */
-	flush_dcache_phys_range(dart_tablebase,
-				dart_tablebase + dart_tablesize);
+	dart_tablebase = __va(memblock_alloc_base(1UL<<24,
+						  1UL<<24, 0x80000000L));
+
+	/* There is no point scanning the DART space for leaks*/
+	kmemleak_no_scan((void *)dart_tablebase);
 
 	/* Allocate a spare page to map all invalid DART pages. We need to do
 	 * that to work around what looks like a problem with the HT bridge
@@ -249,20 +261,50 @@ static int __init dart_init(struct device_node *dart_node)
 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 					 DARTMAP_RPNMASK);
 
+	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
+}
+
+static int __init dart_init(struct device_node *dart_node)
+{
+	unsigned int i;
+	unsigned long base, size;
+	struct resource r;
+
+	/* IOMMU disabled by the user ? bail out */
+	if (iommu_is_off)
+		return 0;
+
+	/* Only use the DART if the machine has more than 1GB of RAM
+	 * or if requested with iommu=on on cmdline.
+	 *
+	 * 1GB of RAM is picked as limit because some default devices
+	 * (i.e. Airport Extreme) have 30 bit address range limits.
+	 */
+
+	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
+		return 0;
+
+	/* Get DART registers */
+	if (of_address_to_resource(dart_node, 0, &r))
+		panic("DART: can't get register base ! ");
+
 	/* Map in DART registers */
 	dart = ioremap(r.start, resource_size(&r));
 	if (dart == NULL)
 		panic("DART: Cannot map registers!");
 
-	/* Map in DART table */
-	dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize);
+	/* Allocate the DART and dummy page */
+	allocate_dart();
 
 	/* Fill initial table */
 	for (i = 0; i < dart_tablesize/4; i++)
-		dart_vbase[i] = dart_emptyval;
+		dart_tablebase[i] = dart_emptyval;
+
+	/* Push to memory */
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
 
 	/* Initialize DART with table base and enable it. */
-	base = dart_tablebase >> DART_PAGE_SHIFT;
+	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
 	size = dart_tablesize >> DART_PAGE_SHIFT;
 	if (dart_is_u4) {
 		size &= DART_SIZE_U4_SIZE_MASK;
@@ -301,7 +343,7 @@ static void iommu_table_dart_setup(void)
 	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
 
 	/* Initialize the common IOMMU code */
-	iommu_table_dart.it_base = (unsigned long)dart_vbase;
+	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
 	iommu_table_dart.it_index = 0;
 	iommu_table_dart.it_blocksize = 1;
 	iommu_table_dart.it_ops = &iommu_dart_ops;
@@ -404,75 +446,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
 }
 
 #ifdef CONFIG_PM
-static void iommu_dart_save(void)
-{
-	memcpy(dart_copy, dart_vbase, 2*1024*1024);
-}
-
 static void iommu_dart_restore(void)
 {
-	memcpy(dart_vbase, dart_copy, 2*1024*1024);
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
 	dart_tlb_invalidate_all();
 }
 
 static int __init iommu_init_late_dart(void)
 {
-	unsigned long tbasepfn;
-	struct page *p;
-
-	/* if no dart table exists then we won't need to save it
-	 * and the area has also not been reserved */
 	if (!dart_tablebase)
 		return 0;
 
-	tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT;
-	register_nosave_region_late(tbasepfn,
-				    tbasepfn + ((1<<24) >> PAGE_SHIFT));
-
-	/* For suspend we need to copy the dart contents because
-	 * it is not part of the regular mapping (see above) and
-	 * thus not saved automatically. The memory for this copy
-	 * must be allocated early because we need 2 MB. */
-	p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT);
-	BUG_ON(!p);
-	dart_copy = page_address(p);
-
-	ppc_md.iommu_save = iommu_dart_save;
 	ppc_md.iommu_restore = iommu_dart_restore;
 
 	return 0;
 }
 
 late_initcall(iommu_init_late_dart);
-#endif
-
-void __init alloc_dart_table(void)
-{
-	/* Only reserve DART space if machine has more than 1GB of RAM
-	 * or if requested with iommu=on on cmdline.
-	 *
-	 * 1GB of RAM is picked as limit because some default devices
-	 * (i.e. Airport Extreme) have 30 bit address range limits.
-	 */
-
-	if (iommu_is_off)
-		return;
-
-	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
-		return;
-
-	/* 512 pages (2MB) is max DART tablesize. */
-	dart_tablesize = 1UL << 21;
-	/* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
-	 * will blow up an entire large page anyway in the kernel mapping
-	 */
-	dart_tablebase = (unsigned long)
-		__va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
-	/*
-	 * The DART space is later unmapped from the kernel linear mapping and
-	 * accessing dart_tablebase during kmemleak scanning will fault.
-	 */
-	kmemleak_no_scan((void *)dart_tablebase);
-
-	printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
-}
+#endif /* CONFIG_PM */
-- 
2.7.4

  parent reply	other threads:[~2016-06-27 11:30 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-27 11:28 [PATCH 00/38] Reorganize setup code and merge 32 and 64-bit setup_arch() Benjamin Herrenschmidt
2016-06-27 11:28 ` [PATCH 01/38] dt: Add of_device_compatible_match() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 02/38] drm: Fix broken use of _PAGE_NO_CACHE on powerpc Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 03/38] powerpc/prom_init: PTRRELOC is not needed Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 04/38] powerpc: Make PTRRELOC() 32-bit only Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 05/38] powerpc: Factor do_feature_fixup calls Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 06/38] powerpc: Move 64-bit feature fixup earlier Benjamin Herrenschmidt
2016-06-28 11:05   ` Aneesh Kumar K.V
2016-06-28 11:14     ` Benjamin Herrenschmidt
2016-06-28 11:49       ` Aneesh Kumar K.V
2016-06-28 11:57         ` Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 07/38] powerpc: Move 64-bit memory reserves to setup_arch() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 08/38] powerpc: Move epapr_paravirt_early_init() to early_init_devtree() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 09/38] powerpc: Update obsolete comments in setup_32.c about entry conditions Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 10/38] powerpc: Add comment explaining the purpose of setup_kdump_trampoline() Benjamin Herrenschmidt
2016-06-27 11:29 ` Benjamin Herrenschmidt [this message]
2016-06-27 11:29 ` [PATCH 12/38] powerpc: Move FW feature probing out of pseries probe() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 13/38] powerpc: Put exception configuration in a common place Benjamin Herrenschmidt
2016-06-29  6:45   ` [PATCH v2 " Benjamin Herrenschmidt
2016-11-11 10:06   ` [PATCH " Anton Blanchard
2016-11-11 11:04     ` Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 14/38] powerpc/pmac: Remove early allocation of the SMU command buffer Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 15/38] powerpc/64: Move MMU backend selection out of platform code Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 16/38] powerpc/pasemi: Remove IOBMAP allocation from platform probe() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 17/38] powerpc/mm/hash: Don't use machine_is() early during boot Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 18/38] powerpc/rtas: Don't test for machine type in rtas_initialize() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 19/38] powerpc: Don't test for machine type in smp_setup_cpu_maps() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 20/38] powerpc/mm/hash64: Don't test for machine type to detect HEA special case Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 21/38] powerpc/pmac: Remove spurrious machine type test Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 22/38] powerpc/mm: Move hash table ops to a separate structure Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 23/38] powerpc: Ensure that ppc_md is empty before probing for machine type Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 24/38] powerpc: Move 64-bit probe_machine() to later in the boot process Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 25/38] powerpc/cell: Don't use flat device-tree after boot Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 26/38] powerpc/85xx/ge_imp3a: Don't use the " Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 27/38] powerpc/85xx/mpc85xx_ds: " Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 28/38] powerpc/85xx/mpc85xx_rdb: " Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 29/38] powerpc: Move 32-bit probe() machine to later in the boot process Benjamin Herrenschmidt
2016-06-27 20:42   ` Gerhard Pircher
2016-06-27 21:40     ` Benjamin Herrenschmidt
2016-06-28 11:42       ` Gerhard Pircher
2016-06-28 12:01         ` Benjamin Herrenschmidt
2016-06-28 13:25           ` Gerhard Pircher
2016-06-28 22:05             ` Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 30/38] powerpc: Get rid of ppc_md.init_early() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 31/38] powerpc/64: Move the boot time info banner to a separate function Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 32/38] powerpc/64: Move setting of {i, d}cache_bsize to initialize_cache_info() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 33/38] powerpc/64: Move the content of setup_system() to setup_arch() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 34/38] powerpc/32: Move cache info inits to a separate function Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 35/38] powerpc: Re-order the call to smp_setup_cpu_maps() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 36/38] powerpc: Re-order setup_panic() Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 37/38] powerpc/64: Make a few boot functions __init Benjamin Herrenschmidt
2016-06-27 11:29 ` [PATCH 38/38] powerpc: Merge 32-bit and 64-bit setup_arch() Benjamin Herrenschmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1467026976-7974-12-git-send-email-benh@kernel.crashing.org \
    --to=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).