LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC V7 PATCH 09/19] memory-hotplug: does not release memory region in PAGES_PER_SECTION chunks
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

Since applying a patch(de7f0cba96786c), release_mem_region() has been changed
as called in PAGES_PER_SECTION chunks because register_memory_resource() is
called in PAGES_PER_SECTION chunks by add_memory(). But it seems firmware
dependency. If CRS are written in the PAGES_PER_SECTION chunks in ACPI DSDT
Table, register_memory_resource() is called in PAGES_PER_SECTION chunks.
But if CRS are written in the DIMM unit in ACPI DSDT Table,
register_memory_resource() is called in DIMM unit. So release_mem_region()
should not be called in PAGES_PER_SECTION chunks. The patch fixes it.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   13 +++++++++----
 mm/memory_hotplug.c                             |    4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 11d8e05..dc0a035 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -77,7 +77,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 {
 	unsigned long start, start_pfn;
 	struct zone *zone;
-	int ret;
+	int i, ret;
+	int sections_to_remove;
 
 	start_pfn = base >> PAGE_SHIFT;
 
@@ -97,9 +98,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	 * to sysfs "state" file and we can't remove sysfs entries
 	 * while writing to it. So we have to defer it to here.
 	 */
-	ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
-	if (ret)
-		return ret;
+	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
+	for (i = 0; i < sections_to_remove; i++) {
+		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
+		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Update memory regions for memory remove
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 45b03b3..29aff4d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -358,11 +358,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
 	BUG_ON(nr_pages % PAGES_PER_SECTION);
 
+	release_mem_region(phys_start_pfn << PAGE_SHIFT,  nr_pages * PAGE_SIZE);
+
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
-		release_mem_region(pfn << PAGE_SHIFT,
-				   PAGES_PER_SECTION << PAGE_SHIFT);
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 07/19] memory-hotplug: call acpi_bus_remove() to remove memory device
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

The memory device has been ejected and powoffed, so we can call
acpi_bus_remove() to remove the memory device from acpi bus.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/acpi/acpi_memhotplug.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 9d47458..b152767 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -425,8 +425,9 @@ static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
 		}
 
 		/*
-		 * TBD: Invoke acpi_bus_remove to cleanup data structures
+		 * Invoke acpi_bus_remove() to remove memory device
 		 */
+		acpi_bus_remove(device, 1);
 
 		/* _EJ0 succeeded; _OST is not necessary */
 		return;
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 10/19] memory-hotplug: add memory_block_release
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When calling remove_memory_block(), the function shows following message at
device_release().

Device 'memory528' does not have a release() function, it is broken and must
be fixed.

remove_memory_block() calls kfree(mem). I think it shouled be called from
device_release(). So the patch implements memory_block_release()

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 drivers/base/memory.c |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 038be73..1cd3ef3 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -109,6 +109,15 @@ bool is_memblk_offline(unsigned long start, unsigned long size)
 }
 EXPORT_SYMBOL(is_memblk_offline);
 
+#define to_memory_block(device) container_of(device, struct memory_block, dev)
+
+static void release_memory_block(struct device *dev)
+{
+	struct memory_block *mem = to_memory_block(dev);
+
+	kfree(mem);
+}
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -119,6 +128,7 @@ int register_memory(struct memory_block *memory)
 
 	memory->dev.bus = &memory_subsys;
 	memory->dev.id = memory->start_section_nr / sections_per_block;
+	memory->dev.release = release_memory_block;
 
 	error = device_register(&memory->dev);
 	return error;
@@ -674,7 +684,6 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 		mem_remove_simple_file(mem, phys_device);
 		mem_remove_simple_file(mem, removable);
 		unregister_memory(mem);
-		kfree(mem);
 	} else
 		kobject_put(&mem->dev.kobj);
 
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 11/19] memory-hotplug: remove_memory calls __remove_pages
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

The patch adds __remove_pages() to remove_memory(). Then the range of
phys_start_pfn argument and nr_pages argument in __remove_pagse() may
have different zone. So zone argument is removed from __remove_pages()
and __remove_pages() caluculates zone in each section.

When CONFIG_SPARSEMEM_VMEMMAP is defined, there is no way to remove a memmap.
So __remove_section only calls unregister_memory_section().

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |    5 +----
 include/linux/memory_hotplug.h                  |    3 +--
 mm/memory_hotplug.c                             |   18 +++++++++++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index dc0a035..cc14da4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -76,7 +76,6 @@ unsigned long memory_block_size_bytes(void)
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
-	struct zone *zone;
 	int i, ret;
 	int sections_to_remove;
 
@@ -87,8 +86,6 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 		return 0;
 	}
 
-	zone = page_zone(pfn_to_page(start_pfn));
-
 	/*
 	 * Remove section mappings and sysfs entries for the
 	 * section of the memory we are removing.
@@ -101,7 +98,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
-		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		ret = __remove_pages(start_pfn,  PAGES_PER_SECTION);
 		if (ret)
 			return ret;
 	}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index fd84ea9..8bf820d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -90,8 +90,7 @@ extern bool is_pageblock_removable_nolock(struct page *page);
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
-extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
-	unsigned long nr_pages);
+extern int __remove_pages(unsigned long start_pfn, unsigned long nr_pages);
 
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 29aff4d..713f1b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -275,11 +275,14 @@ static int __meminit __add_section(int nid, struct zone *zone,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
-	/*
-	 * XXX: Freeing memmap with vmemmap is not implement yet.
-	 *      This should be removed later.
-	 */
-	return -EBUSY;
+	int ret = -EINVAL;
+
+	if (!valid_section(ms))
+		return ret;
+
+	ret = unregister_memory_section(ms);
+
+	return ret;
 }
 #else
 static int __remove_section(struct zone *zone, struct mem_section *ms)
@@ -346,11 +349,11 @@ EXPORT_SYMBOL_GPL(__add_pages);
  * sure that pages are marked reserved and zones are adjust properly by
  * calling offline_pages().
  */
-int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
-		 unsigned long nr_pages)
+int __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages)
 {
 	unsigned long i, ret = 0;
 	int sections_to_remove;
+	struct zone *zone;
 
 	/*
 	 * We can only remove entire sections
@@ -363,6 +366,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		zone = page_zone(pfn_to_page(pfn));
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 15/19] memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by get_page_bootmem().
So the patch searches pages of virtual mapping and registers the pages by
get_page_bootmem().

Note: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390,
and sparc.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 arch/ia64/mm/discontig.c       |    6 ++++
 arch/powerpc/mm/init_64.c      |    6 ++++
 arch/s390/mm/vmem.c            |    6 ++++
 arch/sparc/mm/init_64.c        |    6 ++++
 arch/x86/mm/init_64.c          |   52 ++++++++++++++++++++++++++++++++++++++++
 include/linux/memory_hotplug.h |    2 +
 include/linux/mm.h             |    3 +-
 mm/memory_hotplug.c            |   31 +++++++++++++++++++++--
 8 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c641333..33943db 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -822,4 +822,10 @@ int __meminit vmemmap_populate(struct page *start_page,
 {
 	return vmemmap_populate_basepages(start_page, size, node);
 }
+
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	/* TODO */
+}
 #endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 620b7ac..3690c44 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -298,5 +298,11 @@ int __meminit vmemmap_populate(struct page *start_page,
 
 	return 0;
 }
+
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	/* TODO */
+}
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6f896e7..eda55cd 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -227,6 +227,12 @@ out:
 	return ret;
 }
 
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	/* TODO */
+}
+
 /*
  * Add memory segment to the segment list if it doesn't overlap with
  * an already present segment.
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d58edf5..add1cc7 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2077,6 +2077,12 @@ void __meminit vmemmap_populate_print_last(void)
 		node_start = 0;
 	}
 }
+
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	/* TODO */
+}
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 static void prot_init_common(unsigned long page_none,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e0d88ba..0075592 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1138,6 +1138,58 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 	return 0;
 }
 
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	unsigned long addr = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + size);
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (; addr < end; addr = next) {
+		pte_t *pte = NULL;
+
+		pgd = pgd_offset_k(addr);
+		if (pgd_none(*pgd)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+		pud = pud_offset(pgd, addr);
+		if (pud_none(*pud)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+		if (!cpu_has_pse) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+			get_page_bootmem(section_nr, pmd_page(*pmd),
+					 MIX_SECTION_INFO);
+
+			pte = pte_offset_kernel(pmd, addr);
+			if (pte_none(*pte))
+				continue;
+			get_page_bootmem(section_nr, pte_page(*pte),
+					 SECTION_INFO);
+		} else {
+			next = pmd_addr_end(addr, end);
+
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+			get_page_bootmem(section_nr, pmd_page(*pmd),
+					 SECTION_INFO);
+		}
+	}
+}
+
 void __meminit vmemmap_populate_print_last(void)
 {
 	if (p_start) {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 1133e63..2d18235 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -164,6 +164,8 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 
 extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
+extern void get_page_bootmem(unsigned long ingo, struct page *page,
+			     unsigned long type);
 
 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be90..c607913 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1618,7 +1618,8 @@ int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
-
+void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+				  unsigned long size);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3ca66bc..d8495ff 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -91,8 +91,8 @@ static void release_memory_resource(struct resource *res)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-static void get_page_bootmem(unsigned long info,  struct page *page,
-			     unsigned long type)
+void get_page_bootmem(unsigned long info,  struct page *page,
+		      unsigned long type)
 {
 	unsigned long page_type;
 
@@ -164,8 +164,33 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 
 }
 #else
-static inline void register_page_bootmem_info_section(unsigned long start_pfn)
+static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
+	unsigned long *usemap, mapsize, section_nr, i;
+	struct mem_section *ms;
+	struct page *page, *memmap;
+
+	if (!pfn_valid(start_pfn))
+		return;
+
+	section_nr = pfn_to_section_nr(start_pfn);
+	ms = __nr_to_section(section_nr);
+
+	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+	page = virt_to_page(memmap);
+	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
+	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
+
+	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
+
+	usemap = __nr_to_section(section_nr)->pageblock_flags;
+	page = virt_to_page(usemap);
+
+	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
+
+	for (i = 0; i < mapsize; i++, page++)
+		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 }
 #endif
 
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 12/19] memory-hotplug: introduce new function arch_remove_memory()
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

We don't call __add_pages() directly in the function add_memory()
because some other architecture related things need to be done
before or after calling __add_pages(). So we should introduce
a new function arch_remove_memory() to revert the things
done in arch_add_memory().

Note: the function for s390 is not implemented(I don't know how to
implement it for s390).

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 arch/ia64/mm/init.c                  |   16 ++++
 arch/powerpc/mm/mem.c                |   14 +++
 arch/s390/mm/init.c                  |   12 +++
 arch/sh/mm/init.c                    |   15 +++
 arch/tile/mm/init.c                  |    8 ++
 arch/x86/include/asm/pgtable_types.h |    1 +
 arch/x86/mm/init_32.c                |   10 ++
 arch/x86/mm/init_64.c                |  160 ++++++++++++++++++++++++++++++++++
 arch/x86/mm/pageattr.c               |   47 +++++-----
 include/linux/memory_hotplug.h       |    1 +
 mm/memory_hotplug.c                  |    1 +
 11 files changed, 263 insertions(+), 22 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0eab454..1e345ed 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -688,6 +688,22 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return ret;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	ret = __remove_pages(start_pfn, nr_pages);
+	if (ret)
+		pr_warn("%s: Problem encountered in __remove_pages() as"
+			" ret=%d\n", __func__,  ret);
+
+	return ret;
+}
+#endif
 #endif
 
 /*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index baaafde..249cef4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -133,6 +133,20 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	start = (unsigned long)__va(start);
+	if (remove_section_mapping(start, start + size))
+		return -EINVAL;
+
+	return __remove_pages(start_pfn, nr_pages);
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6adbc08..501b20e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -257,4 +257,16 @@ int arch_add_memory(int nid, u64 start, u64 size)
 		vmem_remove_mapping(start, size);
 	return rc;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	/*
+	 * There is no hardware or firmware interface which could trigger a
+	 * hot memory remove on s390. So there is nothing that needs to be
+	 * implemented.
+	 */
+	return -EBUSY;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 82cc576..fc84491 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -558,4 +558,19 @@ int memory_add_physaddr_to_nid(u64 addr)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	ret = __remove_pages(start_pfn, nr_pages);
+	if (unlikely(ret))
+		pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
+			ret);
+
+	return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index ef29d6c..2749515 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -935,6 +935,14 @@ int remove_memory(u64 start, u64 size)
 {
 	return -EINVAL;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	/* TODO */
+	return -EBUSY;
+}
+#endif
 #endif
 
 struct kmem_cache *pgd_cache;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 013286a..b725af2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -334,6 +334,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
  * as a pte too.
  */
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f..41eefe8 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -842,6 +842,16 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	return __remove_pages(start_pfn, nr_pages);
+}
+#endif
 #endif
 
 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2b6b4a3..e0d88ba 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -675,6 +675,166 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
+static void __meminit
+phys_pte_remove(pte_t *pte_page, unsigned long addr, unsigned long end)
+{
+	unsigned pages = 0;
+	int i = pte_index(addr);
+
+	pte_t *pte = pte_page + pte_index(addr);
+
+	for (; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
+
+		if (addr >= end)
+			break;
+
+		if (!pte_present(*pte))
+			continue;
+
+		pages++;
+		set_pte(pte, __pte(0));
+	}
+
+	update_page_count(PG_LEVEL_4K, -pages);
+}
+
+static void __meminit
+phys_pmd_remove(pmd_t *pmd_page, unsigned long addr, unsigned long end)
+{
+	unsigned long pages = 0, next;
+	int i = pmd_index(addr);
+
+	for (; i < PTRS_PER_PMD; i++, addr = next) {
+		unsigned long pte_phys;
+		pmd_t *pmd = pmd_page + pmd_index(addr);
+		pte_t *pte;
+
+		if (addr >= end)
+			break;
+
+		next = (addr & PMD_MASK) + PMD_SIZE;
+
+		if (!pmd_present(*pmd))
+			continue;
+
+		if (pmd_large(*pmd)) {
+			if ((addr & ~PMD_MASK) == 0 && next <= end) {
+				set_pmd(pmd, __pmd(0));
+				pages++;
+				continue;
+			}
+
+			/*
+			 * We use 2M page, but we need to remove part of them,
+			 * so split 2M page to 4K page.
+			 */
+			pte = alloc_low_page(&pte_phys);
+			__split_large_page((pte_t *)pmd, addr, pte);
+
+			spin_lock(&init_mm.page_table_lock);
+			pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+			spin_unlock(&init_mm.page_table_lock);
+		}
+
+		spin_lock(&init_mm.page_table_lock);
+		pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+		phys_pte_remove(pte, addr, end);
+		unmap_low_page(pte);
+		spin_unlock(&init_mm.page_table_lock);
+	}
+	update_page_count(PG_LEVEL_2M, -pages);
+}
+
+static void __meminit
+phys_pud_remove(pud_t *pud_page, unsigned long addr, unsigned long end)
+{
+	unsigned long pages = 0, next;
+	int i = pud_index(addr);
+
+	for (; i < PTRS_PER_PUD; i++, addr = next) {
+		unsigned long pmd_phys;
+		pud_t *pud = pud_page + pud_index(addr);
+		pmd_t *pmd;
+
+		if (addr >= end)
+			break;
+
+		next = (addr & PUD_MASK) + PUD_SIZE;
+
+		if (!pud_present(*pud))
+			continue;
+
+		if (pud_large(*pud)) {
+			if ((addr & ~PUD_MASK) == 0 && next <= end) {
+				set_pud(pud, __pud(0));
+				pages++;
+				continue;
+			}
+
+			/*
+			 * We use 1G page, but we need to remove part of them,
+			 * so split 1G page to 2M page.
+			 */
+			pmd = alloc_low_page(&pmd_phys);
+			__split_large_page((pte_t *)pud, addr, (pte_t *)pmd);
+
+			spin_lock(&init_mm.page_table_lock);
+			pud_populate(&init_mm, pud, __va(pmd_phys));
+			spin_unlock(&init_mm.page_table_lock);
+		}
+
+		pmd = map_low_page(pmd_offset(pud, 0));
+		phys_pmd_remove(pmd, addr, end);
+		unmap_low_page(pmd);
+		__flush_tlb_all();
+	}
+	__flush_tlb_all();
+
+	update_page_count(PG_LEVEL_1G, -pages);
+}
+
+void __meminit
+kernel_physical_mapping_remove(unsigned long start, unsigned long end)
+{
+	unsigned long next;
+
+	start = (unsigned long)__va(start);
+	end = (unsigned long)__va(end);
+
+	for (; start < end; start = next) {
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		next = (start + PGDIR_SIZE) & PGDIR_MASK;
+		if (next > end)
+			next = end;
+
+		if (!pgd_present(*pgd))
+			continue;
+
+		pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+		phys_pud_remove(pud, __pa(start), __pa(end));
+		unmap_low_page(pud);
+	}
+
+	__flush_tlb_all();
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int __ref arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	ret = __remove_pages(start_pfn, nr_pages);
+	WARN_ON_ONCE(ret);
+
+	kernel_physical_mapping_remove(start, start + size);
+
+	return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a..c22963d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -501,21 +501,13 @@ out_unlock:
 	return do_split;
 }
 
-static int split_large_page(pte_t *kpte, unsigned long address)
+int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
 {
 	unsigned long pfn, pfninc = 1;
 	unsigned int i, level;
-	pte_t *pbase, *tmp;
+	pte_t *tmp;
 	pgprot_t ref_prot;
-	struct page *base;
-
-	if (!debug_pagealloc)
-		spin_unlock(&cpa_lock);
-	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
-	if (!debug_pagealloc)
-		spin_lock(&cpa_lock);
-	if (!base)
-		return -ENOMEM;
+	struct page *base = virt_to_page(pbase);
 
 	spin_lock(&pgd_lock);
 	/*
@@ -523,10 +515,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte)
-		goto out_unlock;
+	if (tmp != kpte) {
+		spin_unlock(&pgd_lock);
+		return 1;
+	}
 
-	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 	/*
@@ -579,17 +572,27 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * going on.
 	 */
 	__flush_tlb_all();
+	spin_unlock(&pgd_lock);
 
-	base = NULL;
+	return 0;
+}
 
-out_unlock:
-	/*
-	 * If we dropped out via the lookup_address check under
-	 * pgd_lock then stick the page back into the pool:
-	 */
-	if (base)
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+	pte_t *pbase;
+	struct page *base;
+
+	if (!debug_pagealloc)
+		spin_unlock(&cpa_lock);
+	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
+	if (!debug_pagealloc)
+		spin_lock(&cpa_lock);
+	if (!base)
+		return -ENOMEM;
+
+	pbase = (pte_t *)page_address(base);
+	if (__split_large_page(kpte, address, pbase))
 		__free_page(base);
-	spin_unlock(&pgd_lock);
 
 	return 0;
 }
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8bf820d..cdbbd79 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -85,6 +85,7 @@ extern void __online_page_free(struct page *page);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
+extern int arch_remove_memory(u64 start, u64 size);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 /* reasonably generic interface to expand the physical pages in a zone  */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 713f1b9..5f9f8c7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1075,6 +1075,7 @@ int __ref remove_memory(int nid, u64 start, u64 size)
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
 
+	arch_remove_memory(start, size);
 out:
 	unlock_memory_hotplug();
 	return ret;
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 14/19] memory-hotplug: move register_page_bootmem_info_node and put_page_bootmem for sparse-vmemmap
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

For implementing register_page_bootmem_info_node of sparse-vmemmap,
register_page_bootmem_info_node and put_page_bootmem are moved to
memory_hotplug.c

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 include/linux/memory_hotplug.h |    9 ---------
 mm/memory_hotplug.c            |    8 ++++++--
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index cdbbd79..1133e63 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -162,17 +162,8 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
-{
-}
-static inline void put_page_bootmem(struct page *page)
-{
-}
-#else
 extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
-#endif
 
 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index d85af6d..3ca66bc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -91,7 +91,6 @@ static void release_memory_resource(struct resource *res)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
@@ -127,6 +126,7 @@ void __ref put_page_bootmem(struct page *page)
 
 }
 
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long *usemap, mapsize, section_nr, i;
@@ -163,6 +163,11 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
+#else
+static inline void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+}
+#endif
 
 void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
@@ -198,7 +203,6 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 		register_page_bootmem_info_section(pfn);
 
 }
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 			   unsigned long end_pfn)
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 08/19] memory-hotplug: remove /sys/firmware/memmap/X sysfs
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When (hot)adding memory into system, /sys/firmware/memmap/X/{end, start, type}
sysfs files are created. But there is no code to remove these files. The patch
implements the function to remove them.

Note : The code does not free firmware_map_entry since there is no way to free
       memory which is allocated by bootmem.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 drivers/firmware/memmap.c    |   78 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/firmware-map.h |    6 +++
 mm/memory_hotplug.c          |    9 ++++-
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index c1cdc92..b2e7e5e 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 
 /*
  * Data types ------------------------------------------------------------------
@@ -79,7 +80,22 @@ static const struct sysfs_ops memmap_attr_ops = {
 	.show = memmap_attr_show,
 };
 
+#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
+
+static void release_firmware_map_entry(struct kobject *kobj)
+{
+	struct firmware_map_entry *entry = to_memmap_entry(kobj);
+	struct page *page;
+
+	page = virt_to_page(entry);
+	if (PageSlab(page) || PageCompound(page))
+		kfree(entry);
+
+	/* There is no way to free memory allocated from bootmem*/
+}
+
 static struct kobj_type memmap_ktype = {
+	.release	= release_firmware_map_entry,
 	.sysfs_ops	= &memmap_attr_ops,
 	.default_attrs	= def_attrs,
 };
@@ -123,6 +139,16 @@ static int firmware_map_add_entry(u64 start, u64 end,
 	return 0;
 }
 
+/**
+ * firmware_map_remove_entry() - Does the real work to remove a firmware
+ * memmap entry.
+ * @entry: removed entry.
+ **/
+static inline void firmware_map_remove_entry(struct firmware_map_entry *entry)
+{
+	list_del(&entry->list);
+}
+
 /*
  * Add memmap entry on sysfs
  */
@@ -144,6 +170,31 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry)
 	return 0;
 }
 
+/*
+ * Remove memmap entry on sysfs
+ */
+static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry)
+{
+	kobject_put(&entry->kobj);
+}
+
+/*
+ * Search memmap entry
+ */
+
+struct firmware_map_entry * __meminit
+find_firmware_map_entry(u64 start, u64 end, const char *type)
+{
+	struct firmware_map_entry *entry;
+
+	list_for_each_entry(entry, &map_entries, list)
+		if ((entry->start == start) && (entry->end == end) &&
+		    (!strcmp(entry->type, type)))
+			return entry;
+
+	return NULL;
+}
+
 /**
  * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
  * memory hotplug.
@@ -196,6 +247,32 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 	return firmware_map_add_entry(start, end, type, entry);
 }
 
+/**
+ * firmware_map_remove() - remove a firmware mapping entry
+ * @start: Start of the memory range.
+ * @end:   End of the memory range.
+ * @type:  Type of the memory range.
+ *
+ * removes a firmware mapping entry.
+ *
+ * Returns 0 on success, or -EINVAL if no entry.
+ **/
+int __meminit firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	struct firmware_map_entry *entry;
+
+	entry = find_firmware_map_entry(start, end - 1, type);
+	if (!entry)
+		return -EINVAL;
+
+	firmware_map_remove_entry(entry);
+
+	/* remove the memmap entry */
+	remove_sysfs_fw_map_entry(entry);
+
+	return 0;
+}
+
 /*
  * Sysfs functions -------------------------------------------------------------
  */
@@ -218,7 +295,6 @@ static ssize_t type_show(struct firmware_map_entry *entry, char *buf)
 }
 
 #define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr)
-#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
 
 static ssize_t memmap_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 43fe52f..71d4fa7 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -25,6 +25,7 @@
 
 int firmware_map_add_early(u64 start, u64 end, const char *type);
 int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
+int firmware_map_remove(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
@@ -38,6 +39,11 @@ static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 	return 0;
 }
 
+static inline int firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	return 0;
+}
+
 #endif /* CONFIG_FIRMWARE_MEMMAP */
 
 #endif /* _LINUX_FIRMWARE_MAP_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3f1d7c5..45b03b3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1052,9 +1052,9 @@ int offline_memory(u64 start, u64 size)
 	return 0;
 }
 
-int remove_memory(int nid, u64 start, u64 size)
+int __ref remove_memory(int nid, u64 start, u64 size)
 {
-	int ret = -EBUSY;
+	int ret = 0;
 	lock_memory_hotplug();
 	/*
 	 * The memory might become online by other task, even if you offine it.
@@ -1065,8 +1065,13 @@ int remove_memory(int nid, u64 start, u64 size)
 			"because the memmory range is online\n",
 			start, start + size);
 		ret = -EAGAIN;
+		goto out;
 	}
 
+	/* remove memmap entry */
+	firmware_map_remove(start, start + size, "System RAM");
+
+out:
 	unlock_memory_hotplug();
 	return ret;
 
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 13/19] memory-hotplug: check page type in get_page_bootmem
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

There is a possibility that get_page_bootmem() is called to the same page many
times. So when get_page_bootmem is called to the same page, the function only
increments page->_count.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |   15 +++++++++++----
 1 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 5f9f8c7..d85af6d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -95,10 +95,17 @@ static void release_memory_resource(struct resource *res)
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
-	page->lru.next = (struct list_head *) type;
-	SetPagePrivate(page);
-	set_page_private(page, info);
-	atomic_inc(&page->_count);
+	unsigned long page_type;
+
+	page_type = (unsigned long) page->lru.next;
+	if (page_type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
+	    page_type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE){
+		page->lru.next = (struct list_head *) type;
+		SetPagePrivate(page);
+		set_page_private(page, info);
+		atomic_inc(&page->_count);
+	} else
+		atomic_inc(&page->_count);
 }
 
 /* reference to __meminit __free_pages_bootmem is valid
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 18/19] memory-hotplug: add node_device_release
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When calling unregister_node(), the function shows following message at
device_release().

Device 'node2' does not have a release() function, it is broken and must be
fixed.

So the patch implements node_device_release()

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/base/node.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index af1a177..9bc2f57 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -252,6 +252,13 @@ static inline void hugetlb_register_node(struct node *node) {}
 static inline void hugetlb_unregister_node(struct node *node) {}
 #endif
 
+static void node_device_release(struct device *dev)
+{
+	struct node *node_dev = to_node(dev);
+
+	flush_work(&node_dev->node_work);
+	memset(node_dev, 0, sizeof(struct node));
+}
 
 /*
  * register_node - Setup a sysfs device for a node.
@@ -265,6 +272,7 @@ int register_node(struct node *node, int num, struct node *parent)
 
 	node->dev.id = num;
 	node->dev.bus = &node_subsys;
+	node->dev.release = node_device_release;
 	error = device_register(&node->dev);
 
 	if (!error){
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 16/19] memory-hotplug: free memmap of sparse-vmemmap
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

All pages of virtual mapping in removed memory cannot be freed, since some pages
used as PGD/PUD includes not only removed memory but also other memory. So the
patch checks whether page can be freed or not.

How to check whether page can be freed or not?
 1. When removing memory, the page structs of the revmoved memory are filled
    with 0FD.
 2. All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared.
    In this case, the page used as PT/PMD can be freed.

Applying patch, __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is integrated
into one. So __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is deleted.

Note:  vmemmap_kfree() and vmemmap_free_bootmem() are not implemented for ia64,
ppc, s390, and sparc.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/ia64/mm/discontig.c  |    8 +++
 arch/powerpc/mm/init_64.c |    8 +++
 arch/s390/mm/vmem.c       |    8 +++
 arch/sparc/mm/init_64.c   |    8 +++
 arch/x86/mm/init_64.c     |  119 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h        |    2 +
 mm/memory_hotplug.c       |   17 +------
 mm/sparse.c               |    5 +-
 8 files changed, 158 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 33943db..0d23b69 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -823,6 +823,14 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return vmemmap_populate_basepages(start_page, size, node);
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3690c44..835a2b3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -299,6 +299,14 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index eda55cd..4b42b0b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -227,6 +227,14 @@ out:
 	return ret;
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index add1cc7..1384826 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2078,6 +2078,14 @@ void __meminit vmemmap_populate_print_last(void)
 	}
 }
 
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0075592..4e8f8a4 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1138,6 +1138,125 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 	return 0;
 }
 
+#define PAGE_INUSE 0xFD
+
+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
+			    struct page **pp, int *page_size)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *page_addr;
+	unsigned long next;
+
+	*pp = NULL;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return pgd_addr_end(addr, end);
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return pud_addr_end(addr, end);
+
+	if (!cpu_has_pse) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		pte = pte_offset_kernel(pmd, addr);
+		if (pte_none(*pte))
+			return next;
+
+		*page_size = PAGE_SIZE;
+		*pp = pte_page(*pte);
+	} else {
+		next = pmd_addr_end(addr, end);
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		*page_size = PMD_SIZE;
+		*pp = pmd_page(*pmd);
+	}
+
+	/*
+	 * Removed page structs are filled with 0xFD.
+	 */
+	memset((void *)addr, PAGE_INUSE, next - addr);
+
+	page_addr = page_address(*pp);
+
+	/*
+	 * Check the page is filled with 0xFD or not.
+	 * memchr_inv() returns the address. In this case, we cannot
+	 * clear PTE/PUD entry, since the page is used by other.
+	 * So we cannot also free the page.
+	 *
+	 * memchr_inv() returns NULL. In this case, we can clear
+	 * PTE/PUD entry, since the page is not used by other.
+	 * So we can also free the page.
+	 */
+	if (memchr_inv(page_addr, PAGE_INUSE, *page_size)) {
+		*pp = NULL;
+		return next;
+	}
+
+	if (!cpu_has_pse)
+		pte_clear(&init_mm, addr, pte);
+	else
+		pmd_clear(pmd);
+
+	return next;
+}
+
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	struct page *page;
+	int page_size;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		page_size = 0;
+		next = find_and_clear_pte_page(addr, end, &page, &page_size);
+		if (!page)
+			continue;
+
+		free_pages((unsigned long)page_address(page),
+			    get_order(page_size));
+		__flush_tlb_one(addr);
+	}
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	struct page *page;
+	int page_size;
+	unsigned long magic;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		page_size = 0;
+		next = find_and_clear_pte_page(addr, end, &page, &page_size);
+		if (!page)
+			continue;
+
+		magic = (unsigned long) page->lru.next;
+		if (magic == SECTION_INFO)
+			put_page_bootmem(page);
+		flush_tlb_kernel_range(addr, end);
+	}
+}
+
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c607913..fb0d1fc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1620,6 +1620,8 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 				  unsigned long size);
+void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
+void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index d8495ff..3aa0766 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -308,19 +308,6 @@ static int __meminit __add_section(int nid, struct zone *zone,
 	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static int __remove_section(struct zone *zone, struct mem_section *ms)
-{
-	int ret = -EINVAL;
-
-	if (!valid_section(ms))
-		return ret;
-
-	ret = unregister_memory_section(ms);
-
-	return ret;
-}
-#else
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
 	unsigned long flags;
@@ -337,9 +324,9 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
 	pgdat_resize_lock(pgdat, &flags);
 	sparse_remove_one_section(zone, ms);
 	pgdat_resize_unlock(pgdat, &flags);
-	return 0;
+
+	return ret;
 }
-#endif
 
 /*
  * Reasonably generic function for adding memory.  It is
diff --git a/mm/sparse.c b/mm/sparse.c
index fac95f2..ab9d755 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -613,12 +613,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 	/* This will make the necessary allocations eventually. */
 	return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
 {
-	return; /* XXX: Not implemented yet */
+	vmemmap_kfree(page, nr_pages);
 }
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
+	vmemmap_free_bootmem(page, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 17/19] memory_hotplug: clear zone when the memory is removed
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When a memory is added, we update zone's and pgdat's start_pfn and spanned_pages
in the function __add_zone(). So we should revert these when the memory is
removed. Add a new function __remove_zone() to do this.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |  181 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 181 insertions(+), 0 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3aa0766..00c050f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -308,10 +308,187 @@ static int __meminit __add_section(int nid, struct zone *zone,
 	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
+/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
+static int find_smallest_section_pfn(unsigned long start_pfn,
+				     unsigned long end_pfn)
+{
+	struct mem_section *ms;
+
+	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(start_pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		return start_pfn;
+	}
+
+	return 0;
+}
+
+/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
+static int find_biggest_section_pfn(unsigned long start_pfn,
+				    unsigned long end_pfn)
+{
+	struct mem_section *ms;
+	unsigned long pfn;
+
+	/* pfn is the end pfn of a memory section. */
+	pfn = end_pfn - 1;
+	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		return pfn;
+	}
+
+	return 0;
+}
+
+static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
+			     unsigned long end_pfn)
+{
+	unsigned long zone_start_pfn =  zone->zone_start_pfn;
+	unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	unsigned long pfn;
+	struct mem_section *ms;
+
+	zone_span_writelock(zone);
+	if (zone_start_pfn == start_pfn) {
+		/*
+		 * If the section is smallest section in the zone, it need
+		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
+		 * In this case, we find second smallest valid mem_section
+		 * for shrinking zone.
+		 */
+		pfn = find_smallest_section_pfn(end_pfn, zone_end_pfn);
+		if (pfn) {
+			zone->zone_start_pfn = pfn;
+			zone->spanned_pages = zone_end_pfn - pfn;
+		}
+	} else if (zone_end_pfn == end_pfn) {
+		/*
+		 * If the section is biggest section in the zone, it need
+		 * shrink zone->spanned_pages.
+		 * In this case, we find second biggest valid mem_section for
+		 * shrinking zone.
+		 */
+		pfn = find_biggest_section_pfn(zone_start_pfn, start_pfn);
+		if (pfn)
+			zone->spanned_pages = pfn - zone_start_pfn + 1;
+	}
+
+	/*
+	 * The section is not biggest or smallest mem_section in the zone, it
+	 * only creates a hole in the zone. So in this case, we need not
+	 * change the zone. But perhaps, the zone has only hole data. Thus
+	 * it check the zone has only hole or not.
+	 */
+	pfn = zone_start_pfn;
+	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		 /* If the section is current section, it continues the loop */
+		if (start_pfn == pfn)
+			continue;
+
+		/* If we find valid section, we have nothing to do */
+		zone_span_writeunlock(zone);
+		return;
+	}
+
+	/* The zone has no valid section */
+	zone->zone_start_pfn = 0;
+	zone->spanned_pages = 0;
+	zone_span_writeunlock(zone);
+}
+
+static void shrink_pgdat_span(struct pglist_data *pgdat,
+			      unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pgdat_start_pfn =  pgdat->node_start_pfn;
+	unsigned long pgdat_end_pfn =
+		pgdat->node_start_pfn + pgdat->node_spanned_pages;
+	unsigned long pfn;
+	struct mem_section *ms;
+
+	if (pgdat_start_pfn == start_pfn) {
+		/*
+		 * If the section is smallest section in the pgdat, it need
+		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
+		 * In this case, we find second smallest valid mem_section
+		 * for shrinking zone.
+		 */
+		pfn = find_smallest_section_pfn(end_pfn, pgdat_end_pfn);
+		if (pfn) {
+			pgdat->node_start_pfn = pfn;
+			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
+		}
+	} else if (pgdat_end_pfn == end_pfn) {
+		/*
+		 * If the section is biggest section in the pgdat, it need
+		 * shrink pgdat->node_spanned_pages.
+		 * In this case, we find second biggest valid mem_section for
+		 * shrinking zone.
+		 */
+		pfn = find_biggest_section_pfn(pgdat_start_pfn, start_pfn);
+		if (pfn)
+			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
+	}
+
+	/*
+	 * If the section is not biggest or smallest mem_section in the pgdat,
+	 * it only creates a hole in the pgdat. So in this case, we need not
+	 * change the pgdat.
+	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
+	 * has only hole or not.
+	 */
+	pfn = pgdat_start_pfn;
+	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		 /* If the section is current section, it continues the loop */
+		if (start_pfn == pfn)
+			continue;
+
+		/* If we find valid section, we have nothing to do */
+		return;
+	}
+
+	/* The pgdat has no valid section */
+	pgdat->node_start_pfn = 0;
+	pgdat->node_spanned_pages = 0;
+}
+
+static void __remove_zone(struct zone *zone, unsigned long start_pfn)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	int nr_pages = PAGES_PER_SECTION;
+	int zone_type;
+	unsigned long flags;
+
+	zone_type = zone - pgdat->node_zones;
+
+	pgdat_resize_lock(zone->zone_pgdat, &flags);
+	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
+	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
+	pgdat_resize_unlock(zone->zone_pgdat, &flags);
+}
+
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
 	unsigned long flags;
 	struct pglist_data *pgdat = zone->zone_pgdat;
+	unsigned long start_pfn;
+	int scn_nr;
 	int ret = -EINVAL;
 
 	if (!valid_section(ms))
@@ -321,6 +498,10 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
 	if (ret)
 		return ret;
 
+	scn_nr = __section_nr(ms);
+	start_pfn = section_nr_to_pfn(scn_nr);
+	__remove_zone(zone, start_pfn);
+
 	pgdat_resize_lock(pgdat, &flags);
 	sparse_remove_one_section(zone, ms);
 	pgdat_resize_unlock(pgdat, &flags);
-- 
1.7.1

^ permalink raw reply related

* [RFC V7 PATCH 19/19] memory-hotplug: remove sysfs file of node
From: wency @ 2012-08-20  9:35 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1345455342-27752-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

The patch adds node_set_offline() and unregister_one_node() to remove_memory()
for removing sysfs file of node.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 00c050f..e5928c7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1279,6 +1279,11 @@ int __ref remove_memory(int nid, u64 start, u64 size)
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
 
+	if (!node_present_pages(nid)) {
+		node_set_offline(nid);
+		unregister_one_node(nid);
+	}
+
 	arch_remove_memory(start, size);
 out:
 	unlock_memory_hotplug();
-- 
1.7.1

^ permalink raw reply related

* [PATCH V8] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Jia Hongtao @ 2012-08-20  9:55 UTC (permalink / raw)
  To: linuxppc-dev, galak; +Cc: B07421, b38951

We unified the Freescale pci/pcie initialization by changing the fsl_pci
to a platform driver. In previous PCI code architecture the initialization
routine is called at board_setup_arch stage. Now the initialization is done
in probe function which is architectural better. Also It's convenient for
adding PM support for PCI controller in later patch.

Now we registered pci controllers as platform devices. So we combine two
initialization code as one platform driver.

Signed-off-by: Jia Hongtao <B38951@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
---
Changes for V8:
* Use previous primary determination. Based on the point that there are
  bugs on primary-less system.
* Add exceptional support on ge_imp3a in which the primary bus is not the
  first pci bus detected.

 arch/powerpc/platforms/85xx/common.c       |   10 ++++
 arch/powerpc/platforms/85xx/corenet_ds.c   |   34 ++-----------
 arch/powerpc/platforms/85xx/ge_imp3a.c     |   64 ++++++++-----------------
 arch/powerpc/platforms/85xx/mpc8536_ds.c   |   36 +-------------
 arch/powerpc/platforms/85xx/mpc85xx_ads.c  |   11 +---
 arch/powerpc/platforms/85xx/mpc85xx_cds.c  |   16 +-----
 arch/powerpc/platforms/85xx/mpc85xx_ds.c   |   13 ++---
 arch/powerpc/platforms/85xx/mpc85xx_mds.c  |   40 ++-------------
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c  |   30 +++++-------
 arch/powerpc/platforms/85xx/p1010rdb.c     |   14 +-----
 arch/powerpc/platforms/85xx/p1022_ds.c     |   36 ++------------
 arch/powerpc/platforms/85xx/p1022_rdk.c    |   36 ++------------
 arch/powerpc/platforms/85xx/p1023_rds.c    |    9 +--
 arch/powerpc/platforms/85xx/p2041_rdb.c    |    2 +-
 arch/powerpc/platforms/85xx/p3041_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p4080_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p5020_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p5040_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/qemu_e500.c    |    4 +-
 arch/powerpc/platforms/85xx/sbc8548.c      |   21 +-------
 arch/powerpc/platforms/85xx/socrates.c     |   11 +----
 arch/powerpc/platforms/85xx/stx_gp3.c      |   13 +----
 arch/powerpc/platforms/85xx/tqm85xx.c      |   21 +-------
 arch/powerpc/platforms/85xx/xes_mpc85xx.c  |   56 +++-------------------
 arch/powerpc/platforms/86xx/gef_ppc9a.c    |   12 ++---
 arch/powerpc/platforms/86xx/gef_sbc310.c   |   13 ++----
 arch/powerpc/platforms/86xx/gef_sbc610.c   |   12 ++---
 arch/powerpc/platforms/86xx/mpc8610_hpcd.c |   21 ++------
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c |   42 ++--------------
 arch/powerpc/platforms/86xx/sbc8641d.c     |   14 ++----
 arch/powerpc/sysdev/fsl_pci.c              |   71 +++++++++++++++++----------
 arch/powerpc/sysdev/fsl_pci.h              |   15 +++++-
 drivers/edac/mpc85xx_edac.c                |   43 +++++------------
 33 files changed, 203 insertions(+), 525 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 67dac22..d0861a0 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -27,6 +27,16 @@ static struct of_device_id __initdata mpc85xx_common_ids[] = {
 	{ .compatible = "fsl,mpc8548-guts", },
 	/* Probably unnecessary? */
 	{ .compatible = "gpio-leds", },
+	/* For all PCI controllers */
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p1010-pcie", },
+	{ .compatible = "fsl,p1023-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index 473d573..7c5d08a 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -16,7 +16,6 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -52,39 +51,16 @@ void __init corenet_ds_pic_init(void)
  */
 void __init corenet_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,p4080-pcie") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.2") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.3") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.4")) {
-			fsl_add_bridge(np, 0);
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PCI) && defined(CONFIG_PPC64)
 	pci_devs_phb_init();
 #endif
-#endif
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_assign_pci_primary();
+
+	swiotlb_detect_4g();
+
 	pr_info("%s board from Freescale Semiconductor\n", ppc_md.name);
 }
 
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index b6a728b..1eb0046 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -22,7 +22,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -84,53 +83,37 @@ void __init ge_imp3a_pic_init(void)
 	of_node_put(cascade_node);
 }
 
-#ifdef CONFIG_PCI
-static int primary_phb_addr;
-#endif	/* CONFIG_PCI */
-
-/*
- * Setup the architecture
- */
-static void __init ge_imp3a_setup_arch(void)
+void ge_imp3a_assign_pci_primary(void)
 {
-	struct device_node *regs;
-#ifdef CONFIG_PCI
 	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
-	if (ppc_md.progress)
-		ppc_md.progress("ge_imp3a_setup_arch()", 0);
+	struct resource rsrc;
 
-#ifdef CONFIG_PCI
 	for_each_node_by_type(np, "pci") {
 		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
 		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
 		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
-			struct resource rsrc;
 			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == primary_phb_addr)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
+			if ((rsrc.start & 0xfffff) == 0x9000)
+				fsl_pci_primary = np;
 		}
 	}
-#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ge_imp3a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	if (ppc_md.progress)
+		ppc_md.progress("ge_imp3a_setup_arch()", 0);
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	ge_imp3a_assign_pci_primary();
+
+	swiotlb_detect_4g();
 
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs");
@@ -215,17 +198,10 @@ static int __init ge_imp3a_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "ge,IMP3A")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0x9000;
-#endif
-		return 1;
-	}
-
-	return 0;
+	return of_flat_dt_is_compatible(root, "ge,IMP3A");
 }
 
-machine_device_initcall(ge_imp3a, mpc85xx_common_publish_devices);
+machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 767c7cf..e79eb4c 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -17,7 +17,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -46,46 +45,17 @@ void __init mpc8536_ds_pic_init(void)
  */
 static void __init mpc8536_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc8536_ds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-
-#endif
+	fsl_assign_pci_primary();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	swiotlb_detect_4g();
 
 	printk("MPC8536 DS board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 29ee8fc..9be38b0 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -137,10 +137,6 @@ static void __init init_ioports(void)
 
 static void __init mpc85xx_ads_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_ads_setup_arch()", 0);
 
@@ -150,11 +146,10 @@ static void __init mpc85xx_ads_setup_arch(void)
 #endif
 
 #ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
+
+	fsl_assign_pci_primary();
 }
 
 static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
@@ -173,7 +168,7 @@ static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 11156fb..bae40b8 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -309,21 +309,11 @@ static void __init mpc85xx_cds_setup_arch(void)
 	}
 
 #ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-
 	ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup;
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
+
+	fsl_assign_pci_primary();
 }
 
 static void mpc85xx_cds_show_cpuinfo(struct seq_file *m)
@@ -355,7 +345,7 @@ static int __init mpc85xx_cds_probe(void)
         return of_flat_dt_is_compatible(root, "MPC85xxCDS");
 }
 
-machine_device_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
 
 define_machine(mpc85xx_cds) {
 	.name		= "MPC85xx CDS",
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 56f8c8f..52246a0 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -20,7 +20,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -129,12 +128,12 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
 }
 #endif	/* CONFIG_PCI */
 
-static void __init mpc85xx_ds_pci_init(void)
+static void __init mpc85xx_ds_assign_pci_primary(void)
 {
 #ifdef CONFIG_PCI
 	struct device_node *node;
 
-	fsl_pci_init();
+	fsl_assign_pci_primary();
 
 	/* See if we have a ULI under the primary */
 
@@ -160,7 +159,7 @@ static void __init mpc85xx_ds_setup_arch(void)
 		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
 
 	swiotlb_detect_4g();
-	mpc85xx_ds_pci_init();
+	mpc85xx_ds_assign_pci_primary();
 	mpc85xx_smp_init();
 
 	printk("MPC85xx DS board from Freescale Semiconductor\n");
@@ -176,9 +175,9 @@ static int __init mpc8544_ds_probe(void)
 	return !!of_flat_dt_is_compatible(root, "MPC8544DS");
 }
 
-machine_device_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
-machine_device_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
-machine_device_initcall(p2020_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 8e4b094..607dcaf 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -327,44 +327,16 @@ static void __init mpc85xx_mds_qeic_init(void) { }
 
 static void __init mpc85xx_mds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct pci_controller *hose;
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_mds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-#endif
-
 	mpc85xx_smp_init();
 
 	mpc85xx_mds_qe_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_assign_pci_primary();
+
+	swiotlb_detect_4g();
 }
 
 
@@ -409,9 +381,9 @@ static int __init mpc85xx_publish_devices(void)
 	return mpc85xx_common_publish_devices();
 }
 
-machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices);
-machine_device_initcall(mpc8569_mds, mpc85xx_publish_devices);
-machine_device_initcall(p1021_mds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
+machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
+machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index 1910fdc..8c1e867 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -86,23 +86,17 @@ void __init mpc85xx_rdb_pic_init(void)
  */
 static void __init mpc85xx_rdb_setup_arch(void)
 {
-#if defined(CONFIG_PCI) || defined(CONFIG_QUICC_ENGINE)
+#ifdef CONFIG_QUICC_ENGINE
 	struct device_node *np;
 #endif
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8548-pcie"))
-			fsl_add_bridge(np, 0);
-	}
-
-#endif
-
 	mpc85xx_smp_init();
 
+	fsl_assign_pci_primary();
+
 #ifdef CONFIG_QUICC_ENGINE
 	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
 	if (!np) {
@@ -161,15 +155,15 @@ qe_fail:
 	printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(p2020_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1025_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p1024_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index dbaf443..ace45ea 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -46,25 +46,15 @@ void __init p1010_rdb_pic_init(void)
  */
 static void __init p1010_rdb_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1010_rdb_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,p1010-pcie"))
-			fsl_add_bridge(np, 0);
-	}
-
-#endif
+	fsl_assign_pci_primary();
 
 	printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(p1010_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
 
 /*
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 3c732ac..8c8f399 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -18,7 +18,6 @@
 
 #include <linux/pci.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
@@ -507,32 +506,9 @@ early_param("video", early_video_setup);
  */
 static void __init p1022_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1022_ds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1022-pcie") {
-		struct resource rsrc;
-		struct pci_controller *hose;
-
-		of_address_to_resource(np, 0, &rsrc);
-
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-#endif
-
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.get_pixel_format	= p1022ds_get_pixel_format;
 	diu_ops.set_gamma_table		= p1022ds_set_gamma_table;
@@ -601,18 +577,14 @@ static void __init p1022_ds_setup_arch(void)
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_assign_pci_primary();
+
+	swiotlb_detect_4g();
 
 	pr_info("Freescale P1022 DS reference board\n");
 }
 
-machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index b3cf11b..0565ef7 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -14,7 +14,6 @@
 
 #include <linux/pci.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
@@ -121,32 +120,9 @@ void __init p1022_rdk_pic_init(void)
  */
 static void __init p1022_rdk_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1022_rdk_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1022-pcie") {
-		struct resource rsrc;
-		struct pci_controller *hose;
-
-		of_address_to_resource(np, 0, &rsrc);
-
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-#endif
-
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.set_monitor_port	= p1022rdk_set_monitor_port;
 	diu_ops.set_pixel_clock		= p1022rdk_set_pixel_clock;
@@ -155,18 +131,14 @@ static void __init p1022_rdk_setup_arch(void)
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_assign_pci_primary();
+
+	swiotlb_detect_4g();
 
 	pr_info("Freescale / iVeia P1022 RDK reference board\n");
 }
 
-machine_device_initcall(p1022_rdk, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c
index 2990e8b..fc96531 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rds.c
@@ -80,15 +80,12 @@ static void __init mpc85xx_rds_setup_arch(void)
 		}
 	}
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1023-pcie")
-		fsl_add_bridge(np, 0);
-#endif
-
 	mpc85xx_smp_init();
+
+	fsl_assign_pci_primary();
 }
 
-machine_device_initcall(p1023_rds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1023_rds, mpc85xx_common_publish_devices);
 
 static void __init mpc85xx_rds_pic_init(void)
 {
diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c
index 6541fa2..000c089 100644
--- a/arch/powerpc/platforms/85xx/p2041_rdb.c
+++ b/arch/powerpc/platforms/85xx/p2041_rdb.c
@@ -80,7 +80,7 @@ define_machine(p2041_rdb) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p2041_rdb, corenet_ds_publish_devices);
+machine_arch_initcall(p2041_rdb, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p2041_rdb, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c
index f238efa..b3edc20 100644
--- a/arch/powerpc/platforms/85xx/p3041_ds.c
+++ b/arch/powerpc/platforms/85xx/p3041_ds.c
@@ -82,7 +82,7 @@ define_machine(p3041_ds) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p3041_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p3041_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c
index c92417d..54df106 100644
--- a/arch/powerpc/platforms/85xx/p4080_ds.c
+++ b/arch/powerpc/platforms/85xx/p4080_ds.c
@@ -81,7 +81,7 @@ define_machine(p4080_ds) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p4080_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p4080_ds, corenet_ds_publish_devices);
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier);
 #endif
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index 17bef15..753a42c 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -91,7 +91,7 @@ define_machine(p5020_ds) {
 #endif
 };
 
-machine_device_initcall(p5020_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p5020_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
index 8e22a34..1138185 100644
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ b/arch/powerpc/platforms/85xx/p5040_ds.c
@@ -82,7 +82,7 @@ define_machine(p5040_ds) {
 #endif
 };
 
-machine_device_initcall(p5040_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p5040_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 3c5490c..af30a0f 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -41,7 +41,7 @@ static void __init qemu_e500_setup_arch(void)
 {
 	ppc_md.progress("qemu_e500_setup_arch()", 0);
 
-	fsl_pci_init();
+	fsl_assign_pci_primary();
 	swiotlb_detect_4g();
 	mpc85xx_smp_init();
 }
@@ -56,7 +56,7 @@ static int __init qemu_e500_probe(void)
 	return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500");
 }
 
-machine_device_initcall(qemu_e500, mpc85xx_common_publish_devices);
+machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
 
 define_machine(qemu_e500) {
 	.name			= "QEMU e500",
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index cd3a66b..f69ffa7 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -88,26 +88,11 @@ static int __init sbc8548_hw_rev(void)
  */
 static void __init sbc8548_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("sbc8548_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-#endif
+	fsl_assign_pci_primary();
+
 	sbc_rev = sbc8548_hw_rev();
 }
 
@@ -128,7 +113,7 @@ static void sbc8548_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(sbc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index b9c6daa..dcc9bb3 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -66,20 +66,13 @@ static void __init socrates_pic_init(void)
  */
 static void __init socrates_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("socrates_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-#endif
+	fsl_assign_pci_primary();
 }
 
-machine_device_initcall(socrates, mpc85xx_common_publish_devices);
+machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index e050800..84fec3e 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -60,21 +60,14 @@ static void __init stx_gp3_pic_init(void)
  */
 static void __init stx_gp3_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("stx_gp3_setup_arch()", 0);
 
+	fsl_assign_pci_primary();
+
 #ifdef CONFIG_CPM2
 	cpm2_reset();
 #endif
-
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-#endif
 }
 
 static void stx_gp3_show_cpuinfo(struct seq_file *m)
@@ -93,7 +86,7 @@ static void stx_gp3_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(stx_gp3, mpc85xx_common_publish_devices);
+machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 4d786c2..a2fc85d 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -59,10 +59,6 @@ static void __init tqm85xx_pic_init(void)
  */
 static void __init tqm85xx_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("tqm85xx_setup_arch()", 0);
 
@@ -70,20 +66,7 @@ static void __init tqm85xx_setup_arch(void)
 	cpm2_reset();
 #endif
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			if (!of_address_to_resource(np, 0, &rsrc)) {
-				if ((rsrc.start & 0xfffff) == 0x8000)
-					fsl_add_bridge(np, 1);
-				else
-					fsl_add_bridge(np, 0);
-			}
-		}
-	}
-#endif
+	fsl_assign_pci_primary();
 }
 
 static void tqm85xx_show_cpuinfo(struct seq_file *m)
@@ -123,7 +106,7 @@ static void __init tqm85xx_ti1520_fixup(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
 		tqm85xx_ti1520_fixup);
 
-machine_device_initcall(tqm85xx, mpc85xx_common_publish_devices);
+machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices);
 
 static const char *board[] __initdata = {
 	"tqc,tqm8540",
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 41c6875..b9ba456 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -111,18 +111,11 @@ static void xes_mpc85xx_fixups(void)
 	}
 }
 
-#ifdef CONFIG_PCI
-static int primary_phb_addr;
-#endif
-
 /*
  * Setup the architecture
  */
 static void __init xes_mpc85xx_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
 	struct device_node *root;
 	const char *model = "Unknown";
 
@@ -137,26 +130,14 @@ static void __init xes_mpc85xx_setup_arch(void)
 
 	xes_mpc85xx_fixups();
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == primary_phb_addr)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-#endif
-
 	mpc85xx_smp_init();
+
+	fsl_assign_pci_primary();
 }
 
-machine_device_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
-machine_device_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
-machine_device_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
@@ -165,42 +146,21 @@ static int __init xes_mpc8572_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8572")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0x8000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8572");
 }
 
 static int __init xes_mpc8548_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8548")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0xb000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8548");
 }
 
 static int __init xes_mpc8540_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8540")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0xb000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8540");
 }
 
 define_machine(xes_mpc8572) {
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index 1fca663..b5b24cb 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -73,13 +73,6 @@ static void __init gef_ppc9a_init_irq(void)
 static void __init gef_ppc9a_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
 
 	printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n");
 
@@ -87,6 +80,8 @@ static void __init gef_ppc9a_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
+	fsl_assign_pci_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs");
 	if (regs) {
@@ -221,6 +216,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -231,7 +227,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_ppc9a, declare_of_platform_devices);
+machine_arch_initcall(gef_ppc9a, declare_of_platform_devices);
 
 define_machine(gef_ppc9a) {
 	.name			= "GE PPC9A",
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 14e0e576..3c624b5 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -73,20 +73,14 @@ static void __init gef_sbc310_init_irq(void)
 static void __init gef_sbc310_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
-
 	printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n");
 
 #ifdef CONFIG_SMP
 	mpc86xx_smp_init();
 #endif
 
+	fsl_assign_pci_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
 	if (regs) {
@@ -209,6 +203,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -219,7 +214,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_sbc310, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc310, declare_of_platform_devices);
 
 define_machine(gef_sbc310) {
 	.name			= "GE SBC310",
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index 1638f43..dcb9fdc 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -73,13 +73,6 @@ static void __init gef_sbc610_init_irq(void)
 static void __init gef_sbc610_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
 
 	printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n");
 
@@ -87,6 +80,8 @@ static void __init gef_sbc610_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
+	fsl_assign_pci_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
 	if (regs) {
@@ -198,6 +193,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -208,7 +204,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_sbc610, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc610, declare_of_platform_devices);
 
 define_machine(gef_sbc610) {
 	.name			= "GE SBC610",
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 62cd3c5..db0454a 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -91,6 +91,9 @@ static struct of_device_id __initdata mpc8610_ids[] = {
 	{ .compatible = "simple-bus", },
 	/* So that the DMA channel nodes can be probed individually: */
 	{ .compatible = "fsl,eloplus-dma", },
+	/* PCI controllers */
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{}
 };
 
@@ -107,7 +110,7 @@ static int __init mpc8610_declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
@@ -278,25 +281,13 @@ mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port)
 static void __init mpc86xx_hpcd_setup_arch(void)
 {
 	struct resource r;
-	struct device_node *np;
 	unsigned char *pixis;
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8610-pci")
-		    || of_device_is_compatible(np, "fsl,mpc8641-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0xa000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-        }
-#endif
+	fsl_assign_pci_primary();
+
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.get_pixel_format	= mpc8610hpcd_get_pixel_format;
 	diu_ops.set_gamma_table		= mpc8610hpcd_set_gamma_table;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 817245b..2195047 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -19,7 +19,6 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -51,15 +50,8 @@ extern int uli_exclude_device(struct pci_controller *hose,
 static int mpc86xx_exclude_device(struct pci_controller *hose,
 				   u_char bus, u_char devfn)
 {
-	struct device_node* node;	
-	struct resource rsrc;
-
-	node = hose->dn;
-	of_address_to_resource(node, 0, &rsrc);
-
-	if ((rsrc.start & 0xfffff) == 0x8000) {
+	if (hose->dn == fsl_pci_primary)
 		return uli_exclude_device(hose, bus, devfn);
-	}
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -69,30 +61,11 @@ static int mpc86xx_exclude_device(struct pci_controller *hose,
 static void __init
 mpc86xx_hpcn_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0);
 
 #ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		struct resource rsrc;
-		of_address_to_resource(np, 0, &rsrc);
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-
 	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
-
 #endif
 
 	printk("MPC86xx HPCN board from Freescale Semiconductor\n");
@@ -101,13 +74,9 @@ mpc86xx_hpcn_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_assign_pci_primary();
+
+	swiotlb_detect_4g();
 }
 
 
@@ -162,6 +131,7 @@ static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "fsl,srio", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -171,7 +141,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(mpc86xx_hpcn, declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
 machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
 
 define_machine(mpc86xx_hpcn) {
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
index e7007d0..93e1453 100644
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ b/arch/powerpc/platforms/86xx/sbc8641d.c
@@ -38,23 +38,16 @@
 static void __init
 sbc8641_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("sbc8641_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie")
-		fsl_add_bridge(np, 0);
-#endif
-
 	printk("SBC8641 board from Wind River\n");
 
 #ifdef CONFIG_SMP
 	mpc86xx_smp_init();
 #endif
+
+	fsl_assign_pci_primary();
 }
 
 
@@ -102,6 +95,7 @@ mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -111,7 +105,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(sbc8641, declare_of_platform_devices);
+machine_arch_initcall(sbc8641, declare_of_platform_devices);
 
 define_machine(sbc8641) {
 	.name			= "SBC8641D",
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index da7a3d7..c080de2 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -826,12 +826,9 @@ static const struct of_device_id pci_ids[] = {
 
 struct device_node *fsl_pci_primary;
 
-void __devinit fsl_pci_init(void)
+void fsl_assign_pci_primary(void)
 {
-	int ret;
 	struct device_node *node;
-	struct pci_controller *hose;
-	dma_addr_t max = 0xffffffff;
 
 	/* Callers can specify the primary bus using other means. */
 	if (!fsl_pci_primary) {
@@ -842,38 +839,60 @@ void __devinit fsl_pci_init(void)
 			node = fsl_pci_primary;
 
 			if (of_match_node(pci_ids, node))
-				break;
+				return;
 		}
-	}
 
-	node = NULL;
-	for_each_node_by_type(node, "pci") {
-		if (of_match_node(pci_ids, node)) {
+		node = of_find_node_by_type(NULL, "pci");
+		if (of_match_node(pci_ids, node))
 			/*
 			 * If there's no PCI host bridge with ISA, arbitrarily
 			 * designate one as primary.  This can go away once
 			 * various bugs with primary-less systems are fixed.
 			 */
-			if (!fsl_pci_primary)
-				fsl_pci_primary = node;
-
-			ret = fsl_add_bridge(node, fsl_pci_primary == node);
-			if (ret == 0) {
-				hose = pci_find_hose_for_OF_device(node);
-				max = min(max, hose->dma_window_base_cur +
-						hose->dma_window_size);
-			}
-		}
+			fsl_pci_primary = node;
 	}
+}
+
+static int __devinit fsl_pci_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device_node *node;
+	struct pci_controller *hose;
+
+	node = pdev->dev.of_node;
+	ret = fsl_add_bridge(node, fsl_pci_primary == node);
 
 #ifdef CONFIG_SWIOTLB
-	/*
-	 * if we couldn't map all of DRAM via the dma windows
-	 * we need SWIOTLB to handle buffers located outside of
-	 * dma capable memory region
-	 */
-	if (memblock_end_of_DRAM() - 1 > max)
-		ppc_swiotlb_enable = 1;
+	if (ret == 0) {
+		hose = pci_find_hose_for_OF_device(pdev->dev.of_node);
+
+		/*
+		 * if we couldn't map all of DRAM via the dma windows
+		 * we need SWIOTLB to handle buffers located outside of
+		 * dma capable memory region
+		 */
+		if (memblock_end_of_DRAM() - 1 > hose->dma_window_base_cur +
+				hose->dma_window_size)
+			ppc_swiotlb_enable = 1;
+	}
 #endif
+
+	mpc85xx_pci_err_probe(pdev);
+
+	return 0;
+}
+
+static struct platform_driver fsl_pci_driver = {
+	.driver = {
+		.name = "fsl-pci",
+		.of_match_table = pci_ids,
+	},
+	.probe = fsl_pci_probe,
+};
+
+static int __init fsl_pci_init(void)
+{
+	return platform_driver_register(&fsl_pci_driver);
 }
+arch_initcall(fsl_pci_init);
 #endif
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index baa0fd1..cd46273 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -95,10 +95,19 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose);
 
 extern struct device_node *fsl_pci_primary;
 
-#ifdef CONFIG_FSL_PCI
-void fsl_pci_init(void);
+#ifdef CONFIG_PCI
+void fsl_assign_pci_primary(void);
 #else
-static inline void fsl_pci_init(void) {}
+static inline void fsl_assign_primary(void) {}
+#endif
+
+#ifdef CONFIG_EDAC_MPC85XX
+int mpc85xx_pci_err_probe(struct platform_device *op);
+#else
+static inline int mpc85xx_pci_err_probe(struct platform_device *op)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 #endif /* __POWERPC_FSL_PCI_H */
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 0e37462..e4b6113 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -200,7 +200,7 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
+int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
 {
 	struct edac_pci_ctl_info *pci;
 	struct mpc85xx_pci_pdata *pdata;
@@ -214,6 +214,16 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
 	if (!pci)
 		return -ENOMEM;
 
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
 	pdata = pci->pvt_info;
 	pdata->name = "mpc85xx_pci_err";
 	pdata->irq = NO_IRQ;
@@ -303,6 +313,7 @@ err:
 	devres_release_group(&op->dev, mpc85xx_pci_err_probe);
 	return res;
 }
+EXPORT_SYMBOL(mpc85xx_pci_err_probe);
 
 static int mpc85xx_pci_err_remove(struct platform_device *op)
 {
@@ -326,27 +337,6 @@ static int mpc85xx_pci_err_remove(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id mpc85xx_pci_err_of_match[] = {
-	{
-	 .compatible = "fsl,mpc8540-pcix",
-	 },
-	{
-	 .compatible = "fsl,mpc8540-pci",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match);
-
-static struct platform_driver mpc85xx_pci_err_driver = {
-	.probe = mpc85xx_pci_err_probe,
-	.remove = __devexit_p(mpc85xx_pci_err_remove),
-	.driver = {
-		.name = "mpc85xx_pci_err",
-		.owner = THIS_MODULE,
-		.of_match_table = mpc85xx_pci_err_of_match,
-	},
-};
-
 #endif				/* CONFIG_PCI */
 
 /**************************** L2 Err device ***************************/
@@ -1193,12 +1183,6 @@ static int __init mpc85xx_mc_init(void)
 	if (res)
 		printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
 
-#ifdef CONFIG_PCI
-	res = platform_driver_register(&mpc85xx_pci_err_driver);
-	if (res)
-		printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
-#endif
-
 #ifdef CONFIG_FSL_SOC_BOOKE
 	pvr = mfspr(SPRN_PVR);
 
@@ -1235,9 +1219,6 @@ static void __exit mpc85xx_mc_exit(void)
 		on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
 	}
 #endif
-#ifdef CONFIG_PCI
-	platform_driver_unregister(&mpc85xx_pci_err_driver);
-#endif
 	platform_driver_unregister(&mpc85xx_l2_err_driver);
 	platform_driver_unregister(&mpc85xx_mc_err_driver);
 }
-- 
1.7.5.1

^ permalink raw reply related

* [PATCH V8] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Jia Hongtao @ 2012-08-20 10:06 UTC (permalink / raw)
  To: linuxppc-dev, galak; +Cc: B07421, b38951

We unified the Freescale pci/pcie initialization by changing the fsl_pci
to a platform driver. In previous PCI code architecture the initialization
routine is called at board_setup_arch stage. Now the initialization is done
in probe function which is architectural better. Also It's convenient for
adding PM support for PCI controller in later patch.

Now we registered pci controllers as platform devices. So we combine two
initialization code as one platform driver.

Signed-off-by: Jia Hongtao <B38951@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
---
Changes for V8:
* Use previous primary determination. Based on the point that there are
  bugs on primary-less system.
* Add exceptional support on ge_imp3a in which the primary bus is not the
  first pci bus detected.

 arch/powerpc/platforms/85xx/common.c       |   10 ++++
 arch/powerpc/platforms/85xx/corenet_ds.c   |   34 ++-----------
 arch/powerpc/platforms/85xx/ge_imp3a.c     |   64 ++++++++-----------------
 arch/powerpc/platforms/85xx/mpc8536_ds.c   |   36 +-------------
 arch/powerpc/platforms/85xx/mpc85xx_ads.c  |   11 +---
 arch/powerpc/platforms/85xx/mpc85xx_cds.c  |   16 +-----
 arch/powerpc/platforms/85xx/mpc85xx_ds.c   |   14 ++---
 arch/powerpc/platforms/85xx/mpc85xx_mds.c  |   40 ++-------------
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c  |   30 +++++-------
 arch/powerpc/platforms/85xx/p1010rdb.c     |   14 +-----
 arch/powerpc/platforms/85xx/p1022_ds.c     |   36 ++------------
 arch/powerpc/platforms/85xx/p1022_rdk.c    |   36 ++------------
 arch/powerpc/platforms/85xx/p1023_rds.c    |    9 +--
 arch/powerpc/platforms/85xx/p2041_rdb.c    |    2 +-
 arch/powerpc/platforms/85xx/p3041_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p4080_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p5020_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/p5040_ds.c     |    2 +-
 arch/powerpc/platforms/85xx/qemu_e500.c    |    4 +-
 arch/powerpc/platforms/85xx/sbc8548.c      |   21 +-------
 arch/powerpc/platforms/85xx/socrates.c     |   11 +----
 arch/powerpc/platforms/85xx/stx_gp3.c      |   13 +----
 arch/powerpc/platforms/85xx/tqm85xx.c      |   21 +-------
 arch/powerpc/platforms/85xx/xes_mpc85xx.c  |   56 +++-------------------
 arch/powerpc/platforms/86xx/gef_ppc9a.c    |   12 ++---
 arch/powerpc/platforms/86xx/gef_sbc310.c   |   13 ++----
 arch/powerpc/platforms/86xx/gef_sbc610.c   |   12 ++---
 arch/powerpc/platforms/86xx/mpc8610_hpcd.c |   21 ++------
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c |   42 ++--------------
 arch/powerpc/platforms/86xx/sbc8641d.c     |   14 ++----
 arch/powerpc/sysdev/fsl_pci.c              |   71 +++++++++++++++++----------
 arch/powerpc/sysdev/fsl_pci.h              |   15 +++++-
 drivers/edac/mpc85xx_edac.c                |   43 +++++------------
 33 files changed, 203 insertions(+), 526 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 67dac22..d0861a0 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -27,6 +27,16 @@ static struct of_device_id __initdata mpc85xx_common_ids[] = {
 	{ .compatible = "fsl,mpc8548-guts", },
 	/* Probably unnecessary? */
 	{ .compatible = "gpio-leds", },
+	/* For all PCI controllers */
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p1010-pcie", },
+	{ .compatible = "fsl,p1023-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index 473d573..ed69c92 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -16,7 +16,6 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -52,39 +51,16 @@ void __init corenet_ds_pic_init(void)
  */
 void __init corenet_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,p4080-pcie") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.2") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.3") ||
-		    of_device_is_compatible(np, "fsl,qoriq-pcie-v2.4")) {
-			fsl_add_bridge(np, 0);
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PCI) && defined(CONFIG_PPC64)
 	pci_devs_phb_init();
 #endif
-#endif
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
 	pr_info("%s board from Freescale Semiconductor\n", ppc_md.name);
 }
 
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index b6a728b..1eb0046 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -22,7 +22,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -84,53 +83,37 @@ void __init ge_imp3a_pic_init(void)
 	of_node_put(cascade_node);
 }
 
-#ifdef CONFIG_PCI
-static int primary_phb_addr;
-#endif	/* CONFIG_PCI */
-
-/*
- * Setup the architecture
- */
-static void __init ge_imp3a_setup_arch(void)
+void ge_imp3a_assign_pci_primary(void)
 {
-	struct device_node *regs;
-#ifdef CONFIG_PCI
 	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
-	if (ppc_md.progress)
-		ppc_md.progress("ge_imp3a_setup_arch()", 0);
+	struct resource rsrc;
 
-#ifdef CONFIG_PCI
 	for_each_node_by_type(np, "pci") {
 		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
 		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
 		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
-			struct resource rsrc;
 			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == primary_phb_addr)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
+			if ((rsrc.start & 0xfffff) == 0x9000)
+				fsl_pci_primary = np;
 		}
 	}
-#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ge_imp3a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	if (ppc_md.progress)
+		ppc_md.progress("ge_imp3a_setup_arch()", 0);
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	ge_imp3a_assign_pci_primary();
+
+	swiotlb_detect_4g();
 
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs");
@@ -215,17 +198,10 @@ static int __init ge_imp3a_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "ge,IMP3A")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0x9000;
-#endif
-		return 1;
-	}
-
-	return 0;
+	return of_flat_dt_is_compatible(root, "ge,IMP3A");
 }
 
-machine_device_initcall(ge_imp3a, mpc85xx_common_publish_devices);
+machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 767c7cf..15ce4b5 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -17,7 +17,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -46,46 +45,17 @@ void __init mpc8536_ds_pic_init(void)
  */
 static void __init mpc8536_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc8536_ds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-
-#endif
+	fsl_pci_assign_primary();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	swiotlb_detect_4g();
 
 	printk("MPC8536 DS board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 29ee8fc..7d12a19 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -137,10 +137,6 @@ static void __init init_ioports(void)
 
 static void __init mpc85xx_ads_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_ads_setup_arch()", 0);
 
@@ -150,11 +146,10 @@ static void __init mpc85xx_ads_setup_arch(void)
 #endif
 
 #ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
+
+	fsl_pci_assign_primary();
 }
 
 static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
@@ -173,7 +168,7 @@ static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 11156fb..48cb82a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -309,21 +309,11 @@ static void __init mpc85xx_cds_setup_arch(void)
 	}
 
 #ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-
 	ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup;
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
+
+	fsl_pci_assign_primary();
 }
 
 static void mpc85xx_cds_show_cpuinfo(struct seq_file *m)
@@ -355,7 +345,7 @@ static int __init mpc85xx_cds_probe(void)
         return of_flat_dt_is_compatible(root, "MPC85xxCDS");
 }
 
-machine_device_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
 
 define_machine(mpc85xx_cds) {
 	.name		= "MPC85xx CDS",
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 56f8c8f..9ebb91e 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -20,7 +20,6 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -129,13 +128,11 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
 }
 #endif	/* CONFIG_PCI */
 
-static void __init mpc85xx_ds_pci_init(void)
+static void __init mpc85xx_ds_uli_init(void)
 {
 #ifdef CONFIG_PCI
 	struct device_node *node;
 
-	fsl_pci_init();
-
 	/* See if we have a ULI under the primary */
 
 	node = of_find_node_by_name(NULL, "uli1575");
@@ -160,7 +157,8 @@ static void __init mpc85xx_ds_setup_arch(void)
 		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
 
 	swiotlb_detect_4g();
-	mpc85xx_ds_pci_init();
+	fsl_pci_assign_primary();
+	mpc85xx_ds_uli_init();
 	mpc85xx_smp_init();
 
 	printk("MPC85xx DS board from Freescale Semiconductor\n");
@@ -176,9 +174,9 @@ static int __init mpc8544_ds_probe(void)
 	return !!of_flat_dt_is_compatible(root, "MPC8544DS");
 }
 
-machine_device_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
-machine_device_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
-machine_device_initcall(p2020_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 8e4b094..8498f73 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -327,44 +327,16 @@ static void __init mpc85xx_mds_qeic_init(void) { }
 
 static void __init mpc85xx_mds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct pci_controller *hose;
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_mds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-
-			hose = pci_find_hose_for_OF_device(np);
-			max = min(max, hose->dma_window_base_cur +
-					hose->dma_window_size);
-		}
-	}
-#endif
-
 	mpc85xx_smp_init();
 
 	mpc85xx_mds_qe_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
 }
 
 
@@ -409,9 +381,9 @@ static int __init mpc85xx_publish_devices(void)
 	return mpc85xx_common_publish_devices();
 }
 
-machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices);
-machine_device_initcall(mpc8569_mds, mpc85xx_publish_devices);
-machine_device_initcall(p1021_mds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
+machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
+machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index 1910fdc..ede8771 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -86,23 +86,17 @@ void __init mpc85xx_rdb_pic_init(void)
  */
 static void __init mpc85xx_rdb_setup_arch(void)
 {
-#if defined(CONFIG_PCI) || defined(CONFIG_QUICC_ENGINE)
+#ifdef CONFIG_QUICC_ENGINE
 	struct device_node *np;
 #endif
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8548-pcie"))
-			fsl_add_bridge(np, 0);
-	}
-
-#endif
-
 	mpc85xx_smp_init();
 
+	fsl_pci_assign_primary();
+
 #ifdef CONFIG_QUICC_ENGINE
 	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
 	if (!np) {
@@ -161,15 +155,15 @@ qe_fail:
 	printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(p2020_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
-machine_device_initcall(p1025_rdb, mpc85xx_common_publish_devices);
-machine_device_initcall(p1024_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index dbaf443..0252961 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -46,25 +46,15 @@ void __init p1010_rdb_pic_init(void)
  */
 static void __init p1010_rdb_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1010_rdb_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,p1010-pcie"))
-			fsl_add_bridge(np, 0);
-	}
-
-#endif
+	fsl_pci_assign_primary();
 
 	printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(p1010_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
 
 /*
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 3c732ac..848a3e9 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -18,7 +18,6 @@
 
 #include <linux/pci.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
@@ -507,32 +506,9 @@ early_param("video", early_video_setup);
  */
 static void __init p1022_ds_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1022_ds_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1022-pcie") {
-		struct resource rsrc;
-		struct pci_controller *hose;
-
-		of_address_to_resource(np, 0, &rsrc);
-
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-#endif
-
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.get_pixel_format	= p1022ds_get_pixel_format;
 	diu_ops.set_gamma_table		= p1022ds_set_gamma_table;
@@ -601,18 +577,14 @@ static void __init p1022_ds_setup_arch(void)
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
 
 	pr_info("Freescale P1022 DS reference board\n");
 }
 
-machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index b3cf11b..55ffa1c 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -14,7 +14,6 @@
 
 #include <linux/pci.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 #include <asm/div64.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
@@ -121,32 +120,9 @@ void __init p1022_rdk_pic_init(void)
  */
 static void __init p1022_rdk_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("p1022_rdk_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1022-pcie") {
-		struct resource rsrc;
-		struct pci_controller *hose;
-
-		of_address_to_resource(np, 0, &rsrc);
-
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-#endif
-
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.set_monitor_port	= p1022rdk_set_monitor_port;
 	diu_ops.set_pixel_clock		= p1022rdk_set_pixel_clock;
@@ -155,18 +131,14 @@ static void __init p1022_rdk_setup_arch(void)
 
 	mpc85xx_smp_init();
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
 
 	pr_info("Freescale / iVeia P1022 RDK reference board\n");
 }
 
-machine_device_initcall(p1022_rdk, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c
index 2990e8b..9cc60a7 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rds.c
@@ -80,15 +80,12 @@ static void __init mpc85xx_rds_setup_arch(void)
 		}
 	}
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,p1023-pcie")
-		fsl_add_bridge(np, 0);
-#endif
-
 	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
 }
 
-machine_device_initcall(p1023_rds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1023_rds, mpc85xx_common_publish_devices);
 
 static void __init mpc85xx_rds_pic_init(void)
 {
diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c
index 6541fa2..000c089 100644
--- a/arch/powerpc/platforms/85xx/p2041_rdb.c
+++ b/arch/powerpc/platforms/85xx/p2041_rdb.c
@@ -80,7 +80,7 @@ define_machine(p2041_rdb) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p2041_rdb, corenet_ds_publish_devices);
+machine_arch_initcall(p2041_rdb, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p2041_rdb, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c
index f238efa..b3edc20 100644
--- a/arch/powerpc/platforms/85xx/p3041_ds.c
+++ b/arch/powerpc/platforms/85xx/p3041_ds.c
@@ -82,7 +82,7 @@ define_machine(p3041_ds) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p3041_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p3041_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c
index c92417d..54df106 100644
--- a/arch/powerpc/platforms/85xx/p4080_ds.c
+++ b/arch/powerpc/platforms/85xx/p4080_ds.c
@@ -81,7 +81,7 @@ define_machine(p4080_ds) {
 	.power_save		= e500_idle,
 };
 
-machine_device_initcall(p4080_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p4080_ds, corenet_ds_publish_devices);
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier);
 #endif
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index 17bef15..753a42c 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -91,7 +91,7 @@ define_machine(p5020_ds) {
 #endif
 };
 
-machine_device_initcall(p5020_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p5020_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
index 8e22a34..1138185 100644
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ b/arch/powerpc/platforms/85xx/p5040_ds.c
@@ -82,7 +82,7 @@ define_machine(p5040_ds) {
 #endif
 };
 
-machine_device_initcall(p5040_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p5040_ds, corenet_ds_publish_devices);
 
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 3c5490c..f6ea561 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -41,7 +41,7 @@ static void __init qemu_e500_setup_arch(void)
 {
 	ppc_md.progress("qemu_e500_setup_arch()", 0);
 
-	fsl_pci_init();
+	fsl_pci_assign_primary();
 	swiotlb_detect_4g();
 	mpc85xx_smp_init();
 }
@@ -56,7 +56,7 @@ static int __init qemu_e500_probe(void)
 	return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500");
 }
 
-machine_device_initcall(qemu_e500, mpc85xx_common_publish_devices);
+machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
 
 define_machine(qemu_e500) {
 	.name			= "QEMU e500",
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index cd3a66b..f621218 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -88,26 +88,11 @@ static int __init sbc8548_hw_rev(void)
  */
 static void __init sbc8548_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("sbc8548_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x8000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-#endif
+	fsl_pci_assign_primary();
+
 	sbc_rev = sbc8548_hw_rev();
 }
 
@@ -128,7 +113,7 @@ static void sbc8548_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(sbc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index b9c6daa..ae368e0 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -66,20 +66,13 @@ static void __init socrates_pic_init(void)
  */
 static void __init socrates_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("socrates_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-#endif
+	fsl_pci_assign_primary();
 }
 
-machine_device_initcall(socrates, mpc85xx_common_publish_devices);
+machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index e050800..6f4939b 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -60,21 +60,14 @@ static void __init stx_gp3_pic_init(void)
  */
 static void __init stx_gp3_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("stx_gp3_setup_arch()", 0);
 
+	fsl_pci_assign_primary();
+
 #ifdef CONFIG_CPM2
 	cpm2_reset();
 #endif
-
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8540-pci")
-		fsl_add_bridge(np, 1);
-#endif
 }
 
 static void stx_gp3_show_cpuinfo(struct seq_file *m)
@@ -93,7 +86,7 @@ static void stx_gp3_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-machine_device_initcall(stx_gp3, mpc85xx_common_publish_devices);
+machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 4d786c2..942870f 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -59,10 +59,6 @@ static void __init tqm85xx_pic_init(void)
  */
 static void __init tqm85xx_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("tqm85xx_setup_arch()", 0);
 
@@ -70,20 +66,7 @@ static void __init tqm85xx_setup_arch(void)
 	cpm2_reset();
 #endif
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			if (!of_address_to_resource(np, 0, &rsrc)) {
-				if ((rsrc.start & 0xfffff) == 0x8000)
-					fsl_add_bridge(np, 1);
-				else
-					fsl_add_bridge(np, 0);
-			}
-		}
-	}
-#endif
+	fsl_pci_assign_primary();
 }
 
 static void tqm85xx_show_cpuinfo(struct seq_file *m)
@@ -123,7 +106,7 @@ static void __init tqm85xx_ti1520_fixup(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
 		tqm85xx_ti1520_fixup);
 
-machine_device_initcall(tqm85xx, mpc85xx_common_publish_devices);
+machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices);
 
 static const char *board[] __initdata = {
 	"tqc,tqm8540",
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 41c6875..dcbf7e4 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -111,18 +111,11 @@ static void xes_mpc85xx_fixups(void)
 	}
 }
 
-#ifdef CONFIG_PCI
-static int primary_phb_addr;
-#endif
-
 /*
  * Setup the architecture
  */
 static void __init xes_mpc85xx_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
 	struct device_node *root;
 	const char *model = "Unknown";
 
@@ -137,26 +130,14 @@ static void __init xes_mpc85xx_setup_arch(void)
 
 	xes_mpc85xx_fixups();
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == primary_phb_addr)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-	}
-#endif
-
 	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
 }
 
-machine_device_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
-machine_device_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
-machine_device_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
@@ -165,42 +146,21 @@ static int __init xes_mpc8572_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8572")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0x8000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8572");
 }
 
 static int __init xes_mpc8548_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8548")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0xb000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8548");
 }
 
 static int __init xes_mpc8540_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	if (of_flat_dt_is_compatible(root, "xes,MPC8540")) {
-#ifdef CONFIG_PCI
-		primary_phb_addr = 0xb000;
-#endif
-		return 1;
-	} else {
-		return 0;
-	}
+	return of_flat_dt_is_compatible(root, "xes,MPC8540");
 }
 
 define_machine(xes_mpc8572) {
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index 1fca663..58d753f 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -73,13 +73,6 @@ static void __init gef_ppc9a_init_irq(void)
 static void __init gef_ppc9a_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
 
 	printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n");
 
@@ -87,6 +80,8 @@ static void __init gef_ppc9a_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
+	fsl_pci_assign_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs");
 	if (regs) {
@@ -221,6 +216,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -231,7 +227,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_ppc9a, declare_of_platform_devices);
+machine_arch_initcall(gef_ppc9a, declare_of_platform_devices);
 
 define_machine(gef_ppc9a) {
 	.name			= "GE PPC9A",
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 14e0e576..67aaa81 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -73,20 +73,14 @@ static void __init gef_sbc310_init_irq(void)
 static void __init gef_sbc310_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
-
 	printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n");
 
 #ifdef CONFIG_SMP
 	mpc86xx_smp_init();
 #endif
 
+	fsl_pci_assign_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
 	if (regs) {
@@ -209,6 +203,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -219,7 +214,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_sbc310, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc310, declare_of_platform_devices);
 
 define_machine(gef_sbc310) {
 	.name			= "GE SBC310",
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index 1638f43..7666273 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -73,13 +73,6 @@ static void __init gef_sbc610_init_irq(void)
 static void __init gef_sbc610_setup_arch(void)
 {
 	struct device_node *regs;
-#ifdef CONFIG_PCI
-	struct device_node *np;
-
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		fsl_add_bridge(np, 1);
-	}
-#endif
 
 	printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n");
 
@@ -87,6 +80,8 @@ static void __init gef_sbc610_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
+	fsl_pci_assign_primary();
+
 	/* Remap basic board registers */
 	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
 	if (regs) {
@@ -198,6 +193,7 @@ static long __init mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -208,7 +204,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(gef_sbc610, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc610, declare_of_platform_devices);
 
 define_machine(gef_sbc610) {
 	.name			= "GE SBC610",
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 62cd3c5..a817398 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -91,6 +91,9 @@ static struct of_device_id __initdata mpc8610_ids[] = {
 	{ .compatible = "simple-bus", },
 	/* So that the DMA channel nodes can be probed individually: */
 	{ .compatible = "fsl,eloplus-dma", },
+	/* PCI controllers */
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{}
 };
 
@@ -107,7 +110,7 @@ static int __init mpc8610_declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
@@ -278,25 +281,13 @@ mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port)
 static void __init mpc86xx_hpcd_setup_arch(void)
 {
 	struct resource r;
-	struct device_node *np;
 	unsigned char *pixis;
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_node_by_type(np, "pci") {
-		if (of_device_is_compatible(np, "fsl,mpc8610-pci")
-		    || of_device_is_compatible(np, "fsl,mpc8641-pcie")) {
-			struct resource rsrc;
-			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0xa000)
-				fsl_add_bridge(np, 1);
-			else
-				fsl_add_bridge(np, 0);
-		}
-        }
-#endif
+	fsl_pci_assign_primary();
+
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 	diu_ops.get_pixel_format	= mpc8610hpcd_get_pixel_format;
 	diu_ops.set_gamma_table		= mpc8610hpcd_set_gamma_table;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 817245b..e8bf3fa 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -19,7 +19,6 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
@@ -51,15 +50,8 @@ extern int uli_exclude_device(struct pci_controller *hose,
 static int mpc86xx_exclude_device(struct pci_controller *hose,
 				   u_char bus, u_char devfn)
 {
-	struct device_node* node;	
-	struct resource rsrc;
-
-	node = hose->dn;
-	of_address_to_resource(node, 0, &rsrc);
-
-	if ((rsrc.start & 0xfffff) == 0x8000) {
+	if (hose->dn == fsl_pci_primary)
 		return uli_exclude_device(hose, bus, devfn);
-	}
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -69,30 +61,11 @@ static int mpc86xx_exclude_device(struct pci_controller *hose,
 static void __init
 mpc86xx_hpcn_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-	struct pci_controller *hose;
-#endif
-	dma_addr_t max = 0xffffffff;
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0);
 
 #ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") {
-		struct resource rsrc;
-		of_address_to_resource(np, 0, &rsrc);
-		if ((rsrc.start & 0xfffff) == 0x8000)
-			fsl_add_bridge(np, 1);
-		else
-			fsl_add_bridge(np, 0);
-		hose = pci_find_hose_for_OF_device(np);
-		max = min(max, hose->dma_window_base_cur +
-			  hose->dma_window_size);
-	}
-
 	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
-
 #endif
 
 	printk("MPC86xx HPCN board from Freescale Semiconductor\n");
@@ -101,13 +74,9 @@ mpc86xx_hpcn_setup_arch(void)
 	mpc86xx_smp_init();
 #endif
 
-#ifdef CONFIG_SWIOTLB
-	if ((memblock_end_of_DRAM() - 1) > max) {
-		ppc_swiotlb_enable = 1;
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	}
-#endif
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
 }
 
 
@@ -162,6 +131,7 @@ static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "fsl,srio", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -171,7 +141,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(mpc86xx_hpcn, declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
 machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
 
 define_machine(mpc86xx_hpcn) {
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
index e7007d0..b47a8fd 100644
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ b/arch/powerpc/platforms/86xx/sbc8641d.c
@@ -38,23 +38,16 @@
 static void __init
 sbc8641_setup_arch(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("sbc8641_setup_arch()", 0);
 
-#ifdef CONFIG_PCI
-	for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie")
-		fsl_add_bridge(np, 0);
-#endif
-
 	printk("SBC8641 board from Wind River\n");
 
 #ifdef CONFIG_SMP
 	mpc86xx_smp_init();
 #endif
+
+	fsl_pci_assign_primary();
 }
 
 
@@ -102,6 +95,7 @@ mpc86xx_time_init(void)
 static __initdata struct of_device_id of_bus_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
 	{},
 };
 
@@ -111,7 +105,7 @@ static int __init declare_of_platform_devices(void)
 
 	return 0;
 }
-machine_device_initcall(sbc8641, declare_of_platform_devices);
+machine_arch_initcall(sbc8641, declare_of_platform_devices);
 
 define_machine(sbc8641) {
 	.name			= "SBC8641D",
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index da7a3d7..e2d33fa 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -826,12 +826,9 @@ static const struct of_device_id pci_ids[] = {
 
 struct device_node *fsl_pci_primary;
 
-void __devinit fsl_pci_init(void)
+void fsl_pci_assign_primary(void)
 {
-	int ret;
 	struct device_node *node;
-	struct pci_controller *hose;
-	dma_addr_t max = 0xffffffff;
 
 	/* Callers can specify the primary bus using other means. */
 	if (!fsl_pci_primary) {
@@ -842,38 +839,60 @@ void __devinit fsl_pci_init(void)
 			node = fsl_pci_primary;
 
 			if (of_match_node(pci_ids, node))
-				break;
+				return;
 		}
-	}
 
-	node = NULL;
-	for_each_node_by_type(node, "pci") {
-		if (of_match_node(pci_ids, node)) {
+		node = of_find_node_by_type(NULL, "pci");
+		if (of_match_node(pci_ids, node))
 			/*
 			 * If there's no PCI host bridge with ISA, arbitrarily
 			 * designate one as primary.  This can go away once
 			 * various bugs with primary-less systems are fixed.
 			 */
-			if (!fsl_pci_primary)
-				fsl_pci_primary = node;
-
-			ret = fsl_add_bridge(node, fsl_pci_primary == node);
-			if (ret == 0) {
-				hose = pci_find_hose_for_OF_device(node);
-				max = min(max, hose->dma_window_base_cur +
-						hose->dma_window_size);
-			}
-		}
+			fsl_pci_primary = node;
 	}
+}
+
+static int __devinit fsl_pci_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device_node *node;
+	struct pci_controller *hose;
+
+	node = pdev->dev.of_node;
+	ret = fsl_add_bridge(node, fsl_pci_primary == node);
 
 #ifdef CONFIG_SWIOTLB
-	/*
-	 * if we couldn't map all of DRAM via the dma windows
-	 * we need SWIOTLB to handle buffers located outside of
-	 * dma capable memory region
-	 */
-	if (memblock_end_of_DRAM() - 1 > max)
-		ppc_swiotlb_enable = 1;
+	if (ret == 0) {
+		hose = pci_find_hose_for_OF_device(pdev->dev.of_node);
+
+		/*
+		 * if we couldn't map all of DRAM via the dma windows
+		 * we need SWIOTLB to handle buffers located outside of
+		 * dma capable memory region
+		 */
+		if (memblock_end_of_DRAM() - 1 > hose->dma_window_base_cur +
+				hose->dma_window_size)
+			ppc_swiotlb_enable = 1;
+	}
 #endif
+
+	mpc85xx_pci_err_probe(pdev);
+
+	return 0;
+}
+
+static struct platform_driver fsl_pci_driver = {
+	.driver = {
+		.name = "fsl-pci",
+		.of_match_table = pci_ids,
+	},
+	.probe = fsl_pci_probe,
+};
+
+static int __init fsl_pci_init(void)
+{
+	return platform_driver_register(&fsl_pci_driver);
 }
+arch_initcall(fsl_pci_init);
 #endif
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index baa0fd1..d5aa1d9 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -95,10 +95,19 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose);
 
 extern struct device_node *fsl_pci_primary;
 
-#ifdef CONFIG_FSL_PCI
-void fsl_pci_init(void);
+#ifdef CONFIG_PCI
+void fsl_pci_assign_primary(void);
 #else
-static inline void fsl_pci_init(void) {}
+static inline void fsl_pci_assign_primary(void) {}
+#endif
+
+#ifdef CONFIG_EDAC_MPC85XX
+int mpc85xx_pci_err_probe(struct platform_device *op);
+#else
+static inline int mpc85xx_pci_err_probe(struct platform_device *op)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 #endif /* __POWERPC_FSL_PCI_H */
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 0e37462..e4b6113 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -200,7 +200,7 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
+int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
 {
 	struct edac_pci_ctl_info *pci;
 	struct mpc85xx_pci_pdata *pdata;
@@ -214,6 +214,16 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op)
 	if (!pci)
 		return -ENOMEM;
 
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
 	pdata = pci->pvt_info;
 	pdata->name = "mpc85xx_pci_err";
 	pdata->irq = NO_IRQ;
@@ -303,6 +313,7 @@ err:
 	devres_release_group(&op->dev, mpc85xx_pci_err_probe);
 	return res;
 }
+EXPORT_SYMBOL(mpc85xx_pci_err_probe);
 
 static int mpc85xx_pci_err_remove(struct platform_device *op)
 {
@@ -326,27 +337,6 @@ static int mpc85xx_pci_err_remove(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id mpc85xx_pci_err_of_match[] = {
-	{
-	 .compatible = "fsl,mpc8540-pcix",
-	 },
-	{
-	 .compatible = "fsl,mpc8540-pci",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match);
-
-static struct platform_driver mpc85xx_pci_err_driver = {
-	.probe = mpc85xx_pci_err_probe,
-	.remove = __devexit_p(mpc85xx_pci_err_remove),
-	.driver = {
-		.name = "mpc85xx_pci_err",
-		.owner = THIS_MODULE,
-		.of_match_table = mpc85xx_pci_err_of_match,
-	},
-};
-
 #endif				/* CONFIG_PCI */
 
 /**************************** L2 Err device ***************************/
@@ -1193,12 +1183,6 @@ static int __init mpc85xx_mc_init(void)
 	if (res)
 		printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
 
-#ifdef CONFIG_PCI
-	res = platform_driver_register(&mpc85xx_pci_err_driver);
-	if (res)
-		printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
-#endif
-
 #ifdef CONFIG_FSL_SOC_BOOKE
 	pvr = mfspr(SPRN_PVR);
 
@@ -1235,9 +1219,6 @@ static void __exit mpc85xx_mc_exit(void)
 		on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
 	}
 #endif
-#ifdef CONFIG_PCI
-	platform_driver_unregister(&mpc85xx_pci_err_driver);
-#endif
 	platform_driver_unregister(&mpc85xx_l2_err_driver);
 	platform_driver_unregister(&mpc85xx_mc_err_driver);
 }
-- 
1.7.5.1

^ permalink raw reply related

* myri10ge in p4080ds ppc
From: leonid @ 2012-08-20 13:06 UTC (permalink / raw)
  To: Linuxppc-dev

Hi
This is my first mail in this group , so I hope this is the right place 
for it.

I am testing myri10ge pci-e (10G ethernet card) on p4080(freescale 
powerpc) evaluation board.
using kernel 2.6.34.6
when I am trying to insert myri10ge kernel module, I getting :
"myri10ge: Version 1.5.2-1.459
myri10ge 0001:03:00.0: invalid sram_size -33B or board span 16777216B "

it is looks like sram_size value is garbage read from the device .

any idea ?

reference:
myri10ge - https://www.myricom.com
p4080ds - 
http://www.freescale.com/webapp/sps/site/prod_summary.jsp?code=P4080DS
-- 
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

^ permalink raw reply

* [PATCH 6/8] ppc/pnv: fix overrunning segment tracing array
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

There're 2 arrays introduced to trace which PE has occupied the
corresponding resource (I/O or MMIO) segment. However, we didn't
allocate enough memory for them and that possiblly leads to PE
descriptor corruption.

The patch fixes that by allocating enough memory for those 2 arrays.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index adaef11..c856f90 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1411,9 +1411,9 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	/* Allocate aux data & arrays */
 	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
 	m32map_off = size;
-	size += phb->ioda.total_pe;
+	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
 	iomap_off = size;
-	size += phb->ioda.total_pe;
+	size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
 	pemap_off = size;
 	size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
 	aux = alloc_bootmem(size);
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH V4 0/8] Rework on PowerNV P7IOC initialization
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan

The rework is done based on Ben's initial ideas on how PE and resource
assignment is done on top of PCI core instead of doing resource assignment
by powernv platform. With the series of patches, the following aspects will
be covered:

	- Only create PE based on PCI bus. Basically, there will be 2 types
	  of PEs built on PCI bus: (A) PE only includes single PCI bus. (B)
	  PE includes the PCI bus as well as its subordinate PCI buses.
	- Make the I/O and MMIO resources of P2P bridge aligned according to
	  the PHB's resource layout. For example, MMIO resource of P2P bridges
	  will be aligned with 16MB if PHB has 16MB M32 segment.
	- The resource allocation will be done on basis of PE.
	- Trivial fixup on the hook of enabling PCI device, PE check etc.

The series of patches has been verified on Firebird-L based on kernel 3.6.RC2. During
the kernel boots, USB HUBs can be detected and initialized correctly. Also, the disk
drive as well as network interfaces can work successfully after booting into shell
environment.

V1 -> V2
	- Add separate field into "struct pci_host_bridge" to trace the minimal
	  alignment of prefetchable MMIO bars for P2P bridges (Benjamin Herrenschmidt)
	- Reduced variable names while measuring the MMIO bars (Benjamin Herrenschmidt)
	- Change the commit log for more clear description on [PATCH 2/8] (Benjamin Herrenschmidt)
	- Change the commit log for more clear description on the types of PCI bus
	  sensitive PEs on [PATCH 5/8] (Richard Yang)

V2 -> V3
	- Rework the sequence of the patches so that it won't break "git bitsect" (Benjamin Herrenschmidt)
	- Cleanup on variables name so that they have shorter length (Benjamin Herrenschmidt)
	- Rework on PE's resource calculation based on the PCI bus (Benjamin Herrenschmidt)
	- Cleanup on changelog (Benjamin Herrenschmidt)

V3 -> V4
	- Rebase to 3.6.RC2
	- The series of patches has been reworked to comply with the generic part
	  "[PATCH v8 0/8] Minimal alignment for p2p bars"	
	- Remove the duplicate assignment for PE number (Richard Yang)

-----

arch/powerpc/platforms/powernv/pci-ioda.c |  694 ++++++++---------------------
arch/powerpc/platforms/powernv/pci.h      |   21 +-
2 files changed, 195 insertions(+), 520 deletions(-)

Thanks,
Gavin

^ permalink raw reply

* [PATCH 2/8] ppc/pnv: PE list based on creation order
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

The resource (I/O and MMIO) will be assigned on basis of PE from
top to bottom so that we can implement the trick here: the resource
that has been assigned to parent PE could be taken by child PE if
necessary.

The current implementation already has PE list per PHB basis, but
the list doesn't meet our requirment: tracing PE based on their
cration time from top to bottom. So the patch does rename for the
DMA based PE list and introduces the list to trace the PEs sequentially
based on their creation time.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   12 ++++++++----
 arch/powerpc/platforms/powernv/pci.h      |   10 ++++++++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 138de18..61b0571 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -657,13 +657,13 @@ static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
 {
 	struct pnv_ioda_pe *lpe;
 
-	list_for_each_entry(lpe, &phb->ioda.pe_list, link) {
+	list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
 		if (lpe->dma_weight < pe->dma_weight) {
-			list_add_tail(&pe->link, &lpe->link);
+			list_add_tail(&pe->dma_link, &lpe->dma_link);
 			return;
 		}
 	}
-	list_add_tail(&pe->link, &phb->ioda.pe_list);
+	list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
 }
 
 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
@@ -829,6 +829,9 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 	/* Associate it with all child devices */
 	pnv_ioda_setup_same_PE(bus, pe);
 
+	/* Put PE to the list */
+	list_add_tail(&pe->list, &phb->ioda.pe_list);
+
 	/* Account for one DMA PE if at least one DMA capable device exist
 	 * below the bridge
 	 */
@@ -1012,7 +1015,7 @@ static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
 	remaining = phb->ioda.tce32_count;
 	tw = phb->ioda.dma_weight;
 	base = 0;
-	list_for_each_entry(pe, &phb->ioda.pe_list, link) {
+	list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
 		if (!pe->dma_weight)
 			continue;
 		if (!remaining) {
@@ -1305,6 +1308,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	phb->ioda.pe_array = aux + pemap_off;
 	set_bit(0, phb->ioda.pe_alloc);
 
+	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
 	INIT_LIST_HEAD(&phb->ioda.pe_list);
 
 	/* Calculate how many 32-bit TCE segments we have */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 0cb760c..b70720b 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -59,7 +59,8 @@ struct pnv_ioda_pe {
 	int			mve_number;
 
 	/* Link in list of PE#s */
-	struct list_head	link;
+	struct list_head	dma_link;
+	struct list_head	list;
 };
 
 struct pnv_phb {
@@ -107,6 +108,11 @@ struct pnv_phb {
 			unsigned int		*io_segmap;
 			struct pnv_ioda_pe	*pe_array;
 
+			/* Sorted list of used PE's based
+			 * on the sequence of creation
+			 */
+			struct list_head	pe_list;
+
 			/* Reverse map of PEs, will have to extend if
 			 * we are to support more than 256 PEs, indexed
 			 * bus { bus, devfn }
@@ -125,7 +131,7 @@ struct pnv_phb {
 			/* Sorted list of used PE's, sorted at
 			 * boot for resource allocation purposes
 			 */
-			struct list_head	pe_list;
+			struct list_head	pe_dma_list;
 		} ioda;
 	};
 
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 4/8] ppc/pnv: initialize DMA for PEs
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch introduces additional wrapper function to call the original
implementation so that the DMA can be configured for all existing PEs.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 96cdc61..09deab2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1242,10 +1242,20 @@ static void __devinit pnv_pci_ioda_setup_seg(void)
 	}
 }
 
+static void __devinit pnv_pci_ioda_setup_DMA(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		pnv_ioda_setup_dma(hose->private_data);
+	}
+}
+
 static void __devinit pnv_pci_ioda_fixup(void)
 {
 	pnv_pci_ioda_setup_PEs();
 	pnv_pci_ioda_setup_seg();
+	pnv_pci_ioda_setup_DMA();
 }
 
 /*
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 8/8] ppc/pnv: remove unused functions
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

We don't need them anymore. The patch removes those functions.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |  441 -----------------------------
 1 files changed, 0 insertions(+), 441 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index ead4eff..5a151ac 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -35,14 +35,6 @@
 #include "powernv.h"
 #include "pci.h"
 
-struct resource_wrap {
-	struct list_head	link;
-	resource_size_t		size;
-	resource_size_t		align;
-	struct pci_dev		*dev;	/* Set if it's a device */
-	struct pci_bus		*bus;	/* Set if it's a bridge */
-};
-
 static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
 		       struct va_format *vaf)
 {
@@ -78,273 +70,6 @@ define_pe_printk_level(pe_err, KERN_ERR);
 define_pe_printk_level(pe_warn, KERN_WARNING);
 define_pe_printk_level(pe_info, KERN_INFO);
 
-
-/* Calculate resource usage & alignment requirement of a single
- * device. This will also assign all resources within the device
- * for a given type starting at 0 for the biggest one and then
- * assigning in decreasing order of size.
- */
-static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags,
-					resource_size_t *size,
-					resource_size_t *align)
-{
-	resource_size_t start;
-	struct resource *r;
-	int i;
-
-	pr_devel("  -> CDR %s\n", pci_name(dev));
-
-	*size = *align = 0;
-
-	/* Clear the resources out and mark them all unset */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		r = &dev->resource[i];
-		if (!(r->flags & flags))
-		    continue;
-		if (r->start) {
-			r->end -= r->start;
-			r->start = 0;
-		}
-		r->flags |= IORESOURCE_UNSET;
-	}
-
-	/* We currently keep all memory resources together, we
-	 * will handle prefetch & 64-bit separately in the future
-	 * but for now we stick everybody in M32
-	 */
-	start = 0;
-	for (;;) {
-		resource_size_t max_size = 0;
-		int max_no = -1;
-
-		/* Find next biggest resource */
-		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-			r = &dev->resource[i];
-			if (!(r->flags & IORESOURCE_UNSET) ||
-			    !(r->flags & flags))
-				continue;
-			if (resource_size(r) > max_size) {
-				max_size = resource_size(r);
-				max_no = i;
-			}
-		}
-		if (max_no < 0)
-			break;
-		r = &dev->resource[max_no];
-		if (max_size > *align)
-			*align = max_size;
-		*size += max_size;
-		r->start = start;
-		start += max_size;
-		r->end = r->start + max_size - 1;
-		r->flags &= ~IORESOURCE_UNSET;
-		pr_devel("  ->     R%d %016llx..%016llx\n",
-			 max_no, r->start, r->end);
-	}
-	pr_devel("  <- CDR %s size=%llx align=%llx\n",
-		 pci_name(dev), *size, *align);
-}
-
-/* Allocate a resource "wrap" for a given device or bridge and
- * insert it at the right position in the sorted list
- */
-static void __devinit pnv_ioda_add_wrap(struct list_head *list,
-					struct pci_bus *bus,
-					struct pci_dev *dev,
-					resource_size_t size,
-					resource_size_t align)
-{
-	struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL);
-
-	w->size = size;
-	w->align = align;
-	w->dev = dev;
-	w->bus = bus;
-
-	list_for_each_entry(w1, list, link) {
-		if (w1->align < align) {
-			list_add_tail(&w->link, &w1->link);
-			return;
-		}
-	}
-	list_add_tail(&w->link, list);
-}
-
-/* Offset device resources of a given type */
-static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev,
-					  unsigned int flags,
-					  resource_size_t offset)
-{
-	struct resource *r;
-	int i;
-
-	pr_devel("  -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
-
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		r = &dev->resource[i];
-		if (r->flags & flags) {
-			dev->resource[i].start += offset;
-			dev->resource[i].end += offset;
-		}
-	}
-
-	pr_devel("  <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
-}
-
-/* Offset bus resources (& all children) of a given type */
-static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus,
-					  unsigned int flags,
-					  resource_size_t offset)
-{
-	struct resource *r;
-	struct pci_dev *dev;
-	struct pci_bus *cbus;
-	int i;
-
-	pr_devel("  -> OBR %s [%x] +%016llx\n",
-		 bus->self ? pci_name(bus->self) : "root", flags, offset);
-
-	pci_bus_for_each_resource(bus, r, i) {
-		if (r && (r->flags & flags)) {
-			r->start += offset;
-			r->end += offset;
-		}
-	}
-	list_for_each_entry(dev, &bus->devices, bus_list)
-		pnv_ioda_offset_dev(dev, flags, offset);
-	list_for_each_entry(cbus, &bus->children, node)
-		pnv_ioda_offset_bus(cbus, flags, offset);
-
-	pr_devel("  <- OBR %s [%x]\n",
-		 bus->self ? pci_name(bus->self) : "root", flags);
-}
-
-/* This is the guts of our IODA resource allocation. This is called
- * recursively for each bus in the system. It calculates all the
- * necessary size and requirements for children and assign them
- * resources such that:
- *
- *   - Each function fits in it's own contiguous set of IO/M32
- *     segment
- *
- *   - All segments behind a P2P bridge are contiguous and obey
- *     alignment constraints of those bridges
- */
-static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags,
-					resource_size_t *size,
-					resource_size_t *align)
-{
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
-	resource_size_t dev_size, dev_align, start;
-	resource_size_t min_align, min_balign;
-	struct pci_dev *cdev;
-	struct pci_bus *cbus;
-	struct list_head head;
-	struct resource_wrap *w;
-	unsigned int bres;
-
-	*size = *align = 0;
-
-	pr_devel("-> CBR %s [%x]\n",
-		 bus->self ? pci_name(bus->self) : "root", flags);
-
-	/* Calculate alignment requirements based on the type
-	 * of resource we are working on
-	 */
-	if (flags & IORESOURCE_IO) {
-		bres = 0;
-		min_align = phb->ioda.io_segsize;
-		min_balign = 0x1000;
-	} else {
-		bres = 1;
-		min_align = phb->ioda.m32_segsize;
-		min_balign = 0x100000;
-	}
-
-	/* Gather all our children resources ordered by alignment */
-	INIT_LIST_HEAD(&head);
-
-	/*   - Busses */
-	list_for_each_entry(cbus, &bus->children, node) {
-		pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align);
-		pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align);
-	}
-
-	/*   - Devices */
-	list_for_each_entry(cdev, &bus->devices, bus_list) {
-		pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align);
-		/* Align them to segment size */
-		if (dev_align < min_align)
-			dev_align = min_align;
-		pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align);
-	}
-	if (list_empty(&head))
-		goto empty;
-
-	/* Now we can do two things: assign offsets to them within that
-	 * level and get our total alignment & size requirements. The
-	 * assignment algorithm is going to be uber-trivial for now, we
-	 * can try to be smarter later at filling out holes.
-	 */
-	if (bus->self) {
-		/* No offset for downstream bridges */
-		start = 0;
-	} else {
-		/* Offset from the root */
-		if (flags & IORESOURCE_IO)
-			/* Don't hand out IO 0 */
-			start = hose->io_resource.start + 0x1000;
-		else
-			start = hose->mem_resources[0].start;
-	}
-	while(!list_empty(&head)) {
-		w = list_first_entry(&head, struct resource_wrap, link);
-		list_del(&w->link);
-		if (w->size) {
-			if (start) {
-				start = ALIGN(start, w->align);
-				if (w->dev)
-					pnv_ioda_offset_dev(w->dev,flags,start);
-				else if (w->bus)
-					pnv_ioda_offset_bus(w->bus,flags,start);
-			}
-			if (w->align > *align)
-				*align = w->align;
-		}
-		start += w->size;
-		kfree(w);
-	}
-	*size = start;
-
-	/* Align and setup bridge resources */
-	*align = max_t(resource_size_t, *align,
-		       max_t(resource_size_t, min_align, min_balign));
-	*size = ALIGN(*size,
-		      max_t(resource_size_t, min_align, min_balign));
- empty:
-	/* Only setup P2P's, not the PHB itself */
-	if (bus->self) {
-		struct resource *res = bus->resource[bres];
-
-		if (WARN_ON(res == NULL))
-			return;
-
-		/*
-		 * FIXME: We should probably export and call
-		 * pci_bridge_check_ranges() to properly re-initialize
-		 * the PCI portion of the flags here, and to detect
-		 * what the bridge actually supports.
-		 */
-		res->start = 0;
-		res->flags = (*size) ? flags : 0;
-		res->end = (*size) ? (*size - 1) : 0;
-	}
-
-	pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
-		 bus->self ? pci_name(bus->self) : "root", flags,*size,*align);
-}
-
 static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
 {
 	struct device_node *np;
@@ -355,172 +80,6 @@ static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
 	return PCI_DN(np);
 }
 
-static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev)
-{
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
-	unsigned int pe, i;
-	resource_size_t pos;
-	struct resource io_res;
-	struct resource m32_res;
-	struct pci_bus_region region;
-	int rc;
-
-	/* Anything not referenced in the device-tree gets PE#0 */
-	pe = pdn ? pdn->pe_number : 0;
-
-	/* Calculate the device min/max */
-	io_res.start = m32_res.start = (resource_size_t)-1;
-	io_res.end = m32_res.end = 0;
-	io_res.flags = IORESOURCE_IO;
-	m32_res.flags = IORESOURCE_MEM;
-
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		struct resource *r = NULL;
-		if (dev->resource[i].flags & IORESOURCE_IO)
-			r = &io_res;
-		if (dev->resource[i].flags & IORESOURCE_MEM)
-			r = &m32_res;
-		if (!r)
-			continue;
-		if (dev->resource[i].start < r->start)
-			r->start = dev->resource[i].start;
-		if (dev->resource[i].end > r->end)
-			r->end = dev->resource[i].end;
-	}
-
-	/* Setup IO segments */
-	if (io_res.start < io_res.end) {
-		pcibios_resource_to_bus(dev, &region, &io_res);
-		pos = region.start;
-		i = pos / phb->ioda.io_segsize;
-		while(i < phb->ioda.total_pe && pos <= region.end) {
-			if (phb->ioda.io_segmap[i]) {
-				pr_err("%s: Trying to use IO seg #%d which is"
-				       " already used by PE# %d\n",
-				       pci_name(dev), i,
-				       phb->ioda.io_segmap[i]);
-				/* XXX DO SOMETHING TO DISABLE DEVICE ? */
-				break;
-			}
-			phb->ioda.io_segmap[i] = pe;
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
-							 OPAL_IO_WINDOW_TYPE,
-							 0, i);
-			if (rc != OPAL_SUCCESS) {
-				pr_err("%s: OPAL error %d setting up mapping"
-				       " for IO seg# %d\n",
-				       pci_name(dev), rc, i);
-				/* XXX DO SOMETHING TO DISABLE DEVICE ? */
-				break;
-			}
-			pos += phb->ioda.io_segsize;
-			i++;
-		};
-	}
-
-	/* Setup M32 segments */
-	if (m32_res.start < m32_res.end) {
-		pcibios_resource_to_bus(dev, &region, &m32_res);
-		pos = region.start;
-		i = pos / phb->ioda.m32_segsize;
-		while(i < phb->ioda.total_pe && pos <= region.end) {
-			if (phb->ioda.m32_segmap[i]) {
-				pr_err("%s: Trying to use M32 seg #%d which is"
-				       " already used by PE# %d\n",
-				       pci_name(dev), i,
-				       phb->ioda.m32_segmap[i]);
-				/* XXX DO SOMETHING TO DISABLE DEVICE ? */
-				break;
-			}
-			phb->ioda.m32_segmap[i] = pe;
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
-							 OPAL_M32_WINDOW_TYPE,
-							 0, i);
-			if (rc != OPAL_SUCCESS) {
-				pr_err("%s: OPAL error %d setting up mapping"
-				       " for M32 seg# %d\n",
-				       pci_name(dev), rc, i);
-				/* XXX DO SOMETHING TO DISABLE DEVICE ? */
-				break;
-			}
-			pos += phb->ioda.m32_segsize;
-			i++;
-		}
-	}
-}
-
-/* Check if a resource still fits in the total IO or M32 range
- * for a given PHB
- */
-static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose,
-					   struct resource *r)
-{
-	struct resource *bounds;
-
-	if (r->flags & IORESOURCE_IO)
-		bounds = &hose->io_resource;
-	else if (r->flags & IORESOURCE_MEM)
-		bounds = &hose->mem_resources[0];
-	else
-		return 1;
-
-	if (r->start >= bounds->start && r->end <= bounds->end)
-		return 1;
-	r->flags = 0;
-	return 0;
-}
-
-static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
-{
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pci_bus *cbus;
-	struct pci_dev *cdev;
-	unsigned int i;
-
-	/* We used to clear all device enables here. However it looks like
-	 * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
-	 * and shoot fatal errors to the PHB which in turns fences itself
-	 * and we can't recover from that ... yet. So for now, let's leave
-	 * the enables as-is and hope for the best.
-	 */
-
-	/* Check if bus resources fit in our IO or M32 range */
-	for (i = 0; bus->self && (i < 2); i++) {
-		struct resource *r = bus->resource[i];
-		if (r && !pnv_ioda_resource_fit(hose, r))
-			pr_err("%s: Bus %d resource %d disabled, no room\n",
-			       pci_name(bus->self), bus->number, i);
-	}
-
-	/* Update self if it's not a PHB */
-	if (bus->self)
-		pci_setup_bridge(bus);
-
-	/* Update child devices */
-	list_for_each_entry(cdev, &bus->devices, bus_list) {
-		/* Check if resource fits, if not, disabled it */
-		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-			struct resource *r = &cdev->resource[i];
-			if (!pnv_ioda_resource_fit(hose, r))
-				pr_err("%s: Resource %d disabled, no room\n",
-				       pci_name(cdev), i);
-		}
-
-		/* Assign segments */
-		pnv_ioda_setup_pe_segments(cdev);
-
-		/* Update HW BARs */
-		for (i = 0; i <= PCI_ROM_RESOURCE; i++)
-			pci_update_resource(cdev, i);
-	}
-
-	/* Update child busses */
-	list_for_each_entry(cbus, &bus->children, node)
-		pnv_ioda_update_resources(cbus);
-}
-
 static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	unsigned long pe;
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 1/8] ppc/pnv: create bus sensitive PEs
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

Basically, there're 2 types of PCI bus sensitive PEs: (A) The PE
includes single PCI bus. (B) The PE includes the PCI bus and all
the subordinate PCI buses. At present, we'd like to put PCI bus
originated by PCI-e link to form PE that contains single PCI bus,
and the PCIe-to-PCI bridge will form the 2nd type of PE. We don't
figure out to detect PLX bridge yet. Once we can detect PLX bridge
some day, we have to put PCI buses originated from the downstream
port of PLX bridge to the 2nd type of PE.

The patch changes the original implementation for a little bit
to support 2 types of PCI bus sensitive PEs described as above.
Also, the function used to retrieve the corresponding PE according
to the given PCI device has been changed based on that because each
PCI device should trace the directly associated PE.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   96 +++++++++++++++++------------
 arch/powerpc/platforms/powernv/pci.h      |   10 ++--
 2 files changed, 62 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index af6b7fb..138de18 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -548,7 +548,7 @@ static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
  * but in the meantime, we need to protect them to avoid warnings
  */
 #ifdef CONFIG_PCI_MSI
-static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
+static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct pnv_phb *phb = hose->private_data;
@@ -560,19 +560,6 @@ static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
 		return NULL;
 	return &phb->ioda.pe_array[pdn->pe_number];
 }
-
-static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
-{
-	struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
-
-	while (!pe && dev->bus->self) {
-		dev = dev->bus->self;
-		pe = __pnv_ioda_get_one_pe(dev);
-		if (pe)
-			pe = pe->bus_pe;
-	}
-	return pe;
-}
 #endif /* CONFIG_PCI_MSI */
 
 static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
@@ -589,7 +576,11 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
 		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
 		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
 		parent = pe->pbus->self;
-		count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		else
+			count = 1;
+
 		switch(count) {
 		case  1: bcomp = OpalPciBusAll;		break;
 		case  2: bcomp = OpalPciBus7Bits;	break;
@@ -699,6 +690,7 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
 	return 10;
 }
 
+#if 0
 static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -767,6 +759,7 @@ static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 
 	return pe;
 }
+#endif /* Useful for SRIOV case */
 
 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 {
@@ -784,34 +777,33 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 		pdn->pcidev = dev;
 		pdn->pe_number = pe->pe_number;
 		pe->dma_weight += pnv_ioda_dma_weight(dev);
-		if (dev->subordinate)
+		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
 			pnv_ioda_setup_same_PE(dev->subordinate, pe);
 	}
 }
 
-static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
-					    struct pnv_ioda_pe *ppe)
+/*
+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
+ * single PCI bus. Another one that contains the primary PCI bus and its
+ * subordinate PCI devices and buses. The second type of PE is normally
+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
+ */
+static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct pnv_phb *phb = hose->private_data;
-	struct pci_bus *bus = dev->subordinate;
 	struct pnv_ioda_pe *pe;
 	int pe_num;
 
-	if (!bus) {
-		pr_warning("%s: Bridge without a subordinate bus !\n",
-			   pci_name(dev));
-		return;
-	}
 	pe_num = pnv_ioda_alloc_pe(phb);
 	if (pe_num == IODA_INVALID_PE) {
-		pr_warning("%s: Not enough PE# available, disabling bus\n",
-			   pci_name(dev));
+		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+			__func__, pci_domain_nr(bus), bus->number);
 		return;
 	}
 
 	pe = &phb->ioda.pe_array[pe_num];
-	ppe->bus_pe = pe;
+	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
 	pe->pbus = bus;
 	pe->pdev = NULL;
 	pe->tce32_seg = -1;
@@ -819,8 +811,12 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
 	pe->rid = bus->busn_res.start << 8;
 	pe->dma_weight = 0;
 
-	pe_info(pe, "Secondary busses %pR associated with PE\n",
-		&bus->busn_res);
+	if (all)
+		pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
+			bus->busn_res.start, bus->busn_res.end, pe_num);
+	else
+		pe_info(pe, "Secondary bus %d associated with PE#%d\n",
+			bus->busn_res.start, pe_num);
 
 	if (pnv_ioda_configure_pe(phb, pe)) {
 		/* XXX What do we do here ? */
@@ -848,17 +844,33 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
 static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
-	struct pnv_ioda_pe *pe;
+
+	pnv_ioda_setup_bus_PE(bus, 0);
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
-		pe = pnv_ioda_setup_dev_PE(dev);
-		if (pe == NULL)
-			continue;
-		/* Leaving the PCIe domain ... single PE# */
-		if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
-			pnv_ioda_setup_bus_PE(dev, pe);
-		else if (dev->subordinate)
-			pnv_ioda_setup_PEs(dev->subordinate);
+		if (dev->subordinate) {
+			if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+				pnv_ioda_setup_bus_PE(dev->subordinate, 1);
+			else
+				pnv_ioda_setup_PEs(dev->subordinate);
+		}
+	}
+}
+
+/*
+ * Configure PEs so that the downstream PCI buses and devices
+ * could have their associated PE#. Unfortunately, we didn't
+ * figure out the way to identify the PLX bridge yet. So we
+ * simply put the PCI bus and the subordinate behind the root
+ * port to PE# here. The game rule here is expected to be changed
+ * as soon as we can detected PLX bridge correctly.
+ */
+static void __devinit pnv_pci_ioda_setup_PEs(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		pnv_ioda_setup_PEs(hose->bus);
 	}
 }
 
@@ -1139,6 +1151,11 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
 	}
 }
 
+static void __devinit pnv_pci_ioda_fixup(void)
+{
+	pnv_pci_ioda_setup_PEs();
+}
+
 /*
  * Returns the alignment for I/O or memory windows for p2p
  * bridges. That actually depends on how PEs are segmented.
@@ -1342,6 +1359,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	 * ourselves here
 	 */
 	ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
+	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
 	pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8bc4796..0cb760c 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -17,9 +17,14 @@ enum pnv_phb_model {
 };
 
 #define PNV_PCI_DIAG_BUF_SIZE	4096
+#define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
+#define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
+#define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_ioda_pe {
+	unsigned long		flags;
+
 	/* A PE can be associated with a single device or an
 	 * entire bus (& children). In the former case, pdev
 	 * is populated, in the later case, pbus is.
@@ -40,11 +45,6 @@ struct pnv_ioda_pe {
 	 */
 	unsigned int		dma_weight;
 
-	/* This is a PCI-E -> PCI-X bridge, this points to the
-	 * corresponding bus PE
-	 */
-	struct pnv_ioda_pe	*bus_pe;
-
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	int			tce32_seg;
 	int			tce32_segcount;
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 3/8] ppc/pnv: I/O and MMIO resource assignment for PEs
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

There're 2 types of PCI bus sensitive PEs: (A) The PE includes
single PCI bus. (B) The PE includes the PCI bus and all the subordinate
PCI buses, and the patch tries to assign I/O and MMIO resources
based on created PEs. Fortunately, we figured out unified scheme
to do resource assignment for all types of PCI bus based PEs according
to Ben's idea:

        - Resource assignment based on PE from top to bottom.
        - The soureces, either I/O or MMIO, of the PE are figured out
          from the assigned PCI bus.
        - The occupied resource by parent PE could possibilly be overrided
          by children PEs.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   89 +++++++++++++++++++++++++++++
 1 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 61b0571..96cdc61 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1154,9 +1154,98 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
 	}
 }
 
+/*
+ * This function is supposed to be called on basis of PE from top
+ * to bottom style. So the the I/O or MMIO segment assigned to
+ * parent PE could be overrided by its child PEs if necessary.
+ */
+static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose,
+				struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_bus_region region;
+	struct resource *res;
+	int i, index;
+	int rc;
+
+	/*
+	 * NOTE: We only care PCI bus based PE for now. For PCI
+	 * device based PE, for example SRIOV sensitive VF should
+	 * be figured out later.
+	 */
+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+	pci_bus_for_each_resource(pe->pbus, res, i) {
+		if (!res || !res->flags ||
+		    res->start > res->end)
+			continue;
+
+		if (res->flags & IORESOURCE_IO) {
+			region.start = res->start - phb->ioda.io_pci_base;
+			region.end   = res->end - phb->ioda.io_pci_base;
+			index = region.start / phb->ioda.io_segsize;
+
+			while (index < phb->ioda.total_pe &&
+			       region.start <= region.end) {
+				phb->ioda.io_segmap[index] = pe->pe_number;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+				if (rc != OPAL_SUCCESS) {
+					pr_err("%s: OPAL error %d when mapping IO "
+					       "segment #%d to PE#%d\n",
+					       __func__, rc, index, pe->pe_number);
+					break;
+				}
+
+				region.start += phb->ioda.io_segsize;
+				index++;
+			}
+		} else if (res->flags & IORESOURCE_MEM) {
+			region.start = res->start -
+				       hose->pci_mem_offset -
+				       phb->ioda.m32_pci_base;
+			region.end   = res->end -
+				       hose->pci_mem_offset -
+				       phb->ioda.m32_pci_base;
+			index = region.start / phb->ioda.m32_segsize;
+
+			while (index < phb->ioda.total_pe &&
+			       region.start <= region.end) {
+				phb->ioda.m32_segmap[index] = pe->pe_number;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+				if (rc != OPAL_SUCCESS) {
+					pr_err("%s: OPAL error %d when mapping M32 "
+					       "segment#%d to PE#%d",
+					       __func__, rc, index, pe->pe_number);
+					break;
+				}
+
+				region.start += phb->ioda.m32_segsize;
+				index++;
+			}
+		}
+	}
+}
+
+static void __devinit pnv_pci_ioda_setup_seg(void)
+{
+	struct pci_controller *tmp, *hose;
+	struct pnv_phb *phb;
+	struct pnv_ioda_pe *pe;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+			pnv_ioda_setup_pe_seg(hose, pe);
+		}
+	}
+}
+
 static void __devinit pnv_pci_ioda_fixup(void)
 {
 	pnv_pci_ioda_setup_PEs();
+	pnv_pci_ioda_setup_seg();
 }
 
 /*
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 5/8] ppc/pnv: skip check on PE if necessary
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

While the device driver or PCI core tries to enable PCI device, the
platform dependent callback "ppc_md.pcibios_enable_device_hook" will
be called to check if there has one associated PE for the PCI device.
If we don't have the associated PE for the PCI device, it's not allowed
to enable the PCI device. Unfortunately, there might have some cases
we have to enable the PCI device (e.g. P2P bridge), but the PEs have
not been created yet.

The patch handles the unfortunate cases. Each PHB (struct pnv_phb)
has one field "initialized" to trace if the PEs have been created
and configured or not. When the PEs are not available, we won't check
the associated PE for the PCI device to be enabled.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   19 ++++++++++++++++++-
 arch/powerpc/platforms/powernv/pci.h      |    1 +
 2 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 09deab2..adaef11 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1245,9 +1245,14 @@ static void __devinit pnv_pci_ioda_setup_seg(void)
 static void __devinit pnv_pci_ioda_setup_DMA(void)
 {
 	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
 
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 		pnv_ioda_setup_dma(hose->private_data);
+
+		/* Mark the PHB initialization done */
+		phb = hose->private_data;
+		phb->initialized = 1;
 	}
 }
 
@@ -1300,10 +1305,22 @@ out:
  */
 static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
-	struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dn *pdn;
 
+	/* The function is probably called while the PEs have
+	 * not be created yet. For example, resource reassignment
+	 * during PCI probe period. We just skip the check if
+	 * PEs isn't ready.
+	 */
+	if (!phb->initialized)
+		return 0;
+
+	pdn = pnv_ioda_get_pdn(dev);
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b70720b..7cfb7c8 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -69,6 +69,7 @@ struct pnv_phb {
 	enum pnv_phb_model	model;
 	u64			opal_id;
 	void __iomem		*regs;
+	int			initialized;
 	spinlock_t		lock;
 
 #ifdef CONFIG_PCI_MSI
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 7/8] ppc/pnv: using PCI core to do resource assignment
From: Gavin Shan @ 2012-08-20 13:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: weiyang, Gavin Shan
In-Reply-To: <1345470561-17785-1-git-send-email-shangw@linux.vnet.ibm.com>

Currently, the PCI probe flags "PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC"
used on powernv platform. That means the platform has to do the PCI
resource assignment by itself.

The patch changes the PCI probe flag to "PCI_REASSIGN_ALL_RSRC" so
that the PCI core will do the resource assignment. Also, the I/O
and MMIO minimal alignment for P2P bridges have been configured
while doing fixup for the PHBs.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   43 +++++------------------------
 1 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index c856f90..ead4eff 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1124,36 +1124,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
 #endif /* CONFIG_PCI_MSI */
 
-/* This is the starting point of our IODA specific resource
- * allocation process
- */
-static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
-{
-	resource_size_t size, align;
-	struct pci_bus *child;
-
-	/* Associate PEs per functions */
-	pnv_ioda_setup_PEs(hose->bus);
-
-	/* Calculate all resources */
-	pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align);
-	pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align);
-
-	/* Apply then to HW */
-	pnv_ioda_update_resources(hose->bus);
-
-	/* Setup DMA */
-	pnv_ioda_setup_dma(hose->private_data);
-
-	/* Configure PCI Express settings */
-	list_for_each_entry(child, &hose->bus->children, node) {
-		struct pci_dev *self = child->self;
-		if (!self)
-			continue;
-		pcie_bus_configure_settings(child, self->pcie_mpss);
-	}
-}
-
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
@@ -1473,16 +1443,17 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	/* Setup MSI support */
 	pnv_pci_init_ioda_msis(phb);
 
-	/* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an
-	 * odd combination which essentially means that we skip all resource
-	 * fixups and assignments in the generic code, and do it all
-	 * ourselves here
+	/*
+	 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
+	 * to let the PCI core do resource assignment. It's supposed
+	 * that the PCI core will do correct I/O and MMIO alignment
+	 * for the P2P bridge bars so that each PCI bus (excluding
+	 * the child P2P bridges) can form individual PE.
 	 */
-	ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
-	pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC);
+	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
 	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
-- 
1.7.5.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox