LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 07/10] powerpc/mpic: Don't open-code dcr_resource_start
From: Kyle Moffett @ 2011-10-31 21:10 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: devicetree-discuss, Milton Miller, Paul Mackerras, Kyle Moffett,
	Scott Wood
In-Reply-To: <1320095411-20667-1-git-send-email-Kyle.D.Moffett@boeing.com>

Don't open-code the OpenFirmware "dcr-reg" property lookup trying to map
DCR resources.  This makes the code a bit easier to read.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
---
 arch/powerpc/sysdev/mpic.c |    7 ++-----
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 31a9ada..0342ab8 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -319,11 +319,8 @@ static void _mpic_map_dcr(struct mpic *mpic, struct device_node *node,
 			  struct mpic_reg_bank *rb,
 			  unsigned int offset, unsigned int size)
 {
-	const u32 *dbasep;
-
-	dbasep = of_get_property(node, "dcr-reg", NULL);
-
-	rb->dhost = dcr_map(node, *dbasep + offset, size);
+	phys_addr_t phys_addr = dcr_resource_start(node);
+	rb->dhost = dcr_map(mpic->node, phys_addr + offset, size);
 	BUG_ON(!DCR_MAP_OK(rb->dhost));
 }
 
-- 
1.7.2.5

^ permalink raw reply related

* [RFC PATCH 04/10] powerpc/mpic: Save computed phys_addr for board-specific code
From: Kyle Moffett @ 2011-10-31 21:10 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: devicetree-discuss, Milton Miller, Scott Wood, Paul Mackerras,
	Kyle Moffett, Olof Johansson, Thomas Gleixner
In-Reply-To: <1320095411-20667-1-git-send-email-Kyle.D.Moffett@boeing.com>

The MPIC code can already perform an automatic OF address translation
step as part of mpic_alloc(), but several boards need to use that base
address when they perform mpic_assign_isu().

The easiest solution is to save the computed physical address into the
"struct mpic" for later use by the board code.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
---
 arch/powerpc/include/asm/mpic.h                   |    3 +++
 arch/powerpc/platforms/embedded6xx/holly.c        |   15 +++------------
 arch/powerpc/platforms/embedded6xx/linkstation.c  |   14 ++++----------
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |   16 +++-------------
 arch/powerpc/platforms/embedded6xx/storcenter.c   |   16 +++-------------
 arch/powerpc/platforms/pasemi/setup.c             |    2 +-
 arch/powerpc/sysdev/mpic.c                        |   11 ++++++-----
 7 files changed, 23 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index df18989..49bab41 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -295,6 +295,9 @@ struct mpic
 	/* Register access method */
 	enum mpic_reg_type	reg_type;
 
+	/* The physical base address of the MPIC */
+	phys_addr_t paddr;
+
 	/* The various ioremap'ed bases */
 	struct mpic_reg_bank	gregs;
 	struct mpic_reg_bank	tmregs;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 487bda0..80b2e2a 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -147,7 +147,6 @@ static void __init holly_setup_arch(void)
 static void __init holly_init_IRQ(void)
 {
 	struct mpic *mpic;
-	phys_addr_t mpic_paddr = 0;
 	struct device_node *tsi_pic;
 #ifdef CONFIG_PCI
 	unsigned int cascade_pci_irq;
@@ -156,20 +155,12 @@ static void __init holly_init_IRQ(void)
 #endif
 
 	tsi_pic = of_find_node_by_type(NULL, "open-pic");
-	if (tsi_pic) {
-		unsigned int size;
-		const void *prop = of_get_property(tsi_pic, "reg", &size);
-		mpic_paddr = of_translate_address(tsi_pic, prop);
-	}
-
-	if (mpic_paddr == 0) {
+	if (!tsi_pic) {
 		printk(KERN_ERR "%s: No tsi108 PIC found !\n", __func__);
 		return;
 	}
 
-	pr_debug("%s: tsi108 pic phys_addr = 0x%x\n", __func__, (u32) mpic_paddr);
-
-	mpic = mpic_alloc(tsi_pic, mpic_paddr,
+	mpic = mpic_alloc(tsi_pic, 0,
 			MPIC_PRIMARY | MPIC_BIG_ENDIAN | MPIC_WANTS_RESET |
 			MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
 			24,
@@ -178,7 +169,7 @@ static void __init holly_init_IRQ(void)
 
 	BUG_ON(mpic == NULL);
 
-	mpic_assign_isu(mpic, 0, mpic_paddr + 0x100);
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
 
 	mpic_init(mpic);
 
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
index 244f997..72b3685 100644
--- a/arch/powerpc/platforms/embedded6xx/linkstation.c
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -82,28 +82,22 @@ static void __init linkstation_init_IRQ(void)
 {
 	struct mpic *mpic;
 	struct device_node *dnp;
-	const u32 *prop;
-	int size;
-	phys_addr_t paddr;
 
 	dnp = of_find_node_by_type(NULL, "open-pic");
 	if (dnp == NULL)
 		return;
 
-	prop = of_get_property(dnp, "reg", &size);
-	paddr = (phys_addr_t)of_translate_address(dnp, prop);
-
-	mpic = mpic_alloc(dnp, paddr, MPIC_PRIMARY | MPIC_WANTS_RESET, 4, 32, " EPIC     ");
+	mpic = mpic_alloc(dnp, 0, MPIC_PRIMARY | MPIC_WANTS_RESET, 4, 32, " EPIC     ");
 	BUG_ON(mpic == NULL);
 
 	/* PCI IRQs */
-	mpic_assign_isu(mpic, 0, paddr + 0x10200);
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
 
 	/* I2C */
-	mpic_assign_isu(mpic, 1, paddr + 0x11000);
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
 
 	/* ttyS0, ttyS1 */
-	mpic_assign_isu(mpic, 2, paddr + 0x11100);
+	mpic_assign_isu(mpic, 2, mpic->paddr + 0x11100);
 
 	mpic_init(mpic);
 }
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 1cb907c..28082f9 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -101,7 +101,6 @@ static void __init mpc7448_hpc2_setup_arch(void)
 static void __init mpc7448_hpc2_init_IRQ(void)
 {
 	struct mpic *mpic;
-	phys_addr_t mpic_paddr = 0;
 	struct device_node *tsi_pic;
 #ifdef CONFIG_PCI
 	unsigned int cascade_pci_irq;
@@ -110,21 +109,12 @@ static void __init mpc7448_hpc2_init_IRQ(void)
 #endif
 
 	tsi_pic = of_find_node_by_type(NULL, "open-pic");
-	if (tsi_pic) {
-		unsigned int size;
-		const void *prop = of_get_property(tsi_pic, "reg", &size);
-		mpic_paddr = of_translate_address(tsi_pic, prop);
-	}
-
-	if (mpic_paddr == 0) {
+	if (!tsi_pic) {
 		printk("%s: No tsi108 PIC found !\n", __func__);
 		return;
 	}
 
-	DBG("%s: tsi108 pic phys_addr = 0x%x\n", __func__,
-	    (u32) mpic_paddr);
-
-	mpic = mpic_alloc(tsi_pic, mpic_paddr,
+	mpic = mpic_alloc(tsi_pic, 0,
 			MPIC_PRIMARY | MPIC_BIG_ENDIAN | MPIC_WANTS_RESET |
 			MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
 			24,
@@ -133,7 +123,7 @@ static void __init mpc7448_hpc2_init_IRQ(void)
 
 	BUG_ON(mpic == NULL);
 
-	mpic_assign_isu(mpic, 0, mpic_paddr + 0x100);
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
 
 	mpic_init(mpic);
 
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index 613070e..797870f 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -84,22 +84,12 @@ static void __init storcenter_init_IRQ(void)
 {
 	struct mpic *mpic;
 	struct device_node *dnp;
-	const void *prop;
-	int size;
-	phys_addr_t paddr;
 
 	dnp = of_find_node_by_type(NULL, "open-pic");
 	if (dnp == NULL)
 		return;
 
-	prop = of_get_property(dnp, "reg", &size);
-	if (prop == NULL) {
-		of_node_put(dnp);
-		return;
-	}
-
-	paddr = (phys_addr_t)of_translate_address(dnp, prop);
-	mpic = mpic_alloc(dnp, paddr, MPIC_PRIMARY | MPIC_WANTS_RESET,
+	mpic = mpic_alloc(dnp, 0, MPIC_PRIMARY | MPIC_WANTS_RESET,
 			16, 32, " OpenPIC  ");
 
 	of_node_put(dnp);
@@ -110,8 +100,8 @@ static void __init storcenter_init_IRQ(void)
 	 * 16 Serial Interrupts followed by 16 Internal Interrupts.
 	 * I2C is the second internal, so it is at 17, 0x11020.
 	 */
-	mpic_assign_isu(mpic, 0, paddr + 0x10200);
-	mpic_assign_isu(mpic, 1, paddr + 0x11000);
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
 
 	mpic_init(mpic);
 }
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 7c858e6..883757e 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -233,7 +233,7 @@ static __init void pas_init_IRQ(void)
 			  mpic_flags, 0, 0, "PASEMI-OPIC");
 	BUG_ON(!mpic);
 
-	mpic_assign_isu(mpic, 0, openpic_addr + 0x10000);
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10000);
 	mpic_init(mpic);
 	/* The NMI/MCK source needs to be prio 15 */
 	if (nmiprop) {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f7de33e..0ad7bf2 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1171,6 +1171,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 		return NULL;
 
 	mpic->name = name;
+	mpic->paddr = phys_addr;
 
 	mpic->hc_irq = mpic_irq_chip;
 	mpic->hc_irq.name = name;
@@ -1248,8 +1249,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 		mpic->reg_type = mpic_access_mmio_le;
 
 	/* Map the global registers */
-	mpic_map(mpic, node, phys_addr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
-	mpic_map(mpic, node, phys_addr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
+	mpic_map(mpic, node, mpic->paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
+	mpic_map(mpic, node, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
 
 	/* Reset */
 
@@ -1298,7 +1299,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 
 	/* Map the per-CPU registers */
 	for (i = 0; i < mpic->num_cpus; i++) {
-		mpic_map(mpic, node, phys_addr, &mpic->cpuregs[i],
+		mpic_map(mpic, node, mpic->paddr, &mpic->cpuregs[i],
 			 MPIC_INFO(CPU_BASE) + i * MPIC_INFO(CPU_STRIDE),
 			 0x1000);
 	}
@@ -1306,7 +1307,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	/* Initialize main ISU if none provided */
 	if (mpic->isu_size == 0) {
 		mpic->isu_size = mpic->num_sources;
-		mpic_map(mpic, node, phys_addr, &mpic->isus[0],
+		mpic_map(mpic, node, mpic->paddr, &mpic->isus[0],
 			 MPIC_INFO(IRQ_BASE), MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
 	}
 	mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
@@ -1338,7 +1339,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	}
 	printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %llx,"
 	       " max %d CPUs\n",
-	       name, vers, (unsigned long long)phys_addr, mpic->num_cpus);
+	       name, vers, (unsigned long long)mpic->paddr, mpic->num_cpus);
 	printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n",
 	       mpic->isu_size, mpic->isu_shift, mpic->isu_mask);
 
-- 
1.7.2.5

^ permalink raw reply related

* [RFC PATCH 03/10] powerpc/mpic: Assume a device-node was passed in mpic_alloc()
From: Kyle Moffett @ 2011-10-31 21:10 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: devicetree-discuss, Milton Miller, Paul Mackerras, Kyle Moffett,
	Scott Wood
In-Reply-To: <1320095411-20667-1-git-send-email-Kyle.D.Moffett@boeing.com>

All of the existing callers of mpic_alloc() pass in a non-NULL
device-node pointer, so the checks for a NULL device-node may be
removed.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
---
 arch/powerpc/sysdev/mpic.c |   50 ++++++++++++++++++-------------------------
 1 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index d6ef4d9..f7de33e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1139,19 +1139,17 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 				unsigned int irq_count,
 				const char *name)
 {
-	struct mpic	*mpic;
-	u32		greg_feature;
-	const char	*vers;
-	int		i;
-	int		intvec_top;
+	int i, psize, intvec_top;
+	struct mpic *mpic;
+	u32 greg_feature;
+	const char *vers;
+	const u32 *psrc;
 
-	/*
-	 * If no phyiscal address was specified then all of the phyiscal
-	 * addressing parameters must come from the device-tree.
-	 */
-	if (!phys_addr) {
-		BUG_ON(!node);
+	/* This code assumes that a non-NULL device node is passed in */
+	BUG_ON(!node);
 
+	/* Pick the physical address from the device tree if unspecified */
+	if (!phys_addr) {
 		/* Check if it is DCR-based */
 		if (of_get_property(node, "dcr-reg")) {
 			flags |= MPIC_USES_DCR;
@@ -1218,28 +1216,22 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	mpic->spurious_vec  = intvec_top;
 
 	/* Check for "big-endian" in device-tree */
-	if (node && of_get_property(node, "big-endian", NULL) != NULL)
+	if (of_get_property(node, "big-endian", NULL) != NULL)
 		mpic->flags |= MPIC_BIG_ENDIAN;
-	if (node && of_device_is_compatible(node, "fsl,mpic"))
+	if (of_device_is_compatible(node, "fsl,mpic"))
 		mpic->flags |= MPIC_FSL;
 
 	/* Look for protected sources */
-	if (node) {
-		int psize;
-		unsigned int bits, mapsize;
-		const u32 *psrc =
-			of_get_property(node, "protected-sources", &psize);
-		if (psrc) {
-			psize /= 4;
-			bits = intvec_top + 1;
-			mapsize = BITS_TO_LONGS(bits) * sizeof(unsigned long);
-			mpic->protected = kzalloc(mapsize, GFP_KERNEL);
-			BUG_ON(mpic->protected == NULL);
-			for (i = 0; i < psize; i++) {
-				if (psrc[i] > intvec_top)
-					continue;
-				__set_bit(psrc[i], mpic->protected);
-			}
+	psrc = of_get_property(node, "protected-sources", &psize);
+	if (psrc) {
+		/* Allocate a bitmap with one bit per interrupt */
+		unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1);
+		mpic->protected = kzalloc(mapsize*sizeof(long), GFP_KERNEL);
+		BUG_ON(mpic->protected == NULL);
+		for (i = 0; i < psize/sizeof(u32); i++) {
+			if (psrc[i] > intvec_top)
+				continue;
+			__set_bit(psrc[i], mpic->protected);
 		}
 	}
 
-- 
1.7.2.5

^ permalink raw reply related

* [RFC PATCH 02/10] powerpc: Consolidate mpic_alloc() OF address translation
From: Kyle Moffett @ 2011-10-31 21:10 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: Lennert Buytenhek, devicetree-discuss, Milton Miller,
	Paul Mackerras, Kyle Moffett, Scott Wood, Thomas Gleixner
In-Reply-To: <1320095411-20667-1-git-send-email-Kyle.D.Moffett@boeing.com>

Instead of using the open-coded "reg" property lookup and address
translation in mpic_alloc(), directly call of_address_to_resource().
This includes various workarounds for special cases which the naive
of_address_translate() does not.

Afterwards it is possible to remove the copiously copy-pasted calls to
of_address_translate() from the 85xx/86xx/powermac platforms.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/corenet_ds.c  |    9 +----
 arch/powerpc/platforms/85xx/ksi8560.c     |    9 +----
 arch/powerpc/platforms/85xx/mpc8536_ds.c  |    9 +----
 arch/powerpc/platforms/85xx/mpc85xx_ads.c |    9 +----
 arch/powerpc/platforms/85xx/mpc85xx_cds.c |    9 +----
 arch/powerpc/platforms/85xx/mpc85xx_ds.c  |   11 +----
 arch/powerpc/platforms/85xx/mpc85xx_mds.c |    9 +----
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c |   11 +----
 arch/powerpc/platforms/85xx/p1010rdb.c    |    9 +----
 arch/powerpc/platforms/85xx/p1022_ds.c    |    9 +----
 arch/powerpc/platforms/85xx/p1023_rds.c   |    9 +----
 arch/powerpc/platforms/85xx/sbc8548.c     |    9 +----
 arch/powerpc/platforms/85xx/sbc8560.c     |    9 +----
 arch/powerpc/platforms/85xx/socrates.c    |    9 +----
 arch/powerpc/platforms/85xx/stx_gp3.c     |    9 +----
 arch/powerpc/platforms/85xx/tqm85xx.c     |    9 +----
 arch/powerpc/platforms/85xx/xes_mpc85xx.c |    9 +----
 arch/powerpc/platforms/86xx/pic.c         |    4 +-
 arch/powerpc/platforms/powermac/pic.c     |    7 +---
 arch/powerpc/sysdev/mpic.c                |   63 +++++++++++++++--------------
 20 files changed, 54 insertions(+), 177 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index 435074d..7893ad3 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -36,7 +36,6 @@
 void __init corenet_ds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 	unsigned int flags = MPIC_PRIMARY | MPIC_BIG_ENDIAN |
 				MPIC_BROKEN_FRR_NIRQS | MPIC_SINGLE_DEST_CPU;
@@ -48,16 +47,10 @@ void __init corenet_ds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
 	if (ppc_md.get_irq == mpic_get_coreint_irq)
 		flags |= MPIC_ENABLE_COREINT;
 
-	mpic = mpic_alloc(np, r.start, flags, 0, 256, " OpenPIC  ");
+	mpic = mpic_alloc(np, 0, flags, 0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
 
 	mpic_init(mpic);
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index c46f935..b20c07d 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -68,7 +68,6 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
 static void __init ksi8560_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 #ifdef CONFIG_CPM2
 	int irq;
@@ -81,13 +80,7 @@ static void __init ksi8560_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index f79f2f1..03173ba 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -35,7 +35,6 @@
 void __init mpc8536_ds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -44,13 +43,7 @@ void __init mpc8536_ds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			  MPIC_PRIMARY | MPIC_WANTS_RESET |
 			  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS,
 			0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 3b2c9bb..5cb797b 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -64,7 +64,6 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
 static void __init mpc85xx_ads_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 #ifdef CONFIG_CPM2
 	int irq;
@@ -76,13 +75,7 @@ static void __init mpc85xx_ads_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 2bf9978..69c1d0a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -187,7 +187,6 @@ static struct irqaction mpc85xxcds_8259_irqaction = {
 static void __init mpc85xx_cds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 
 	np = of_find_node_by_type(np, "open-pic");
@@ -197,13 +196,7 @@ static void __init mpc85xx_cds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 52d2a3e..b608da7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -61,7 +61,6 @@ static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
 void __init mpc85xx_ds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 #ifdef CONFIG_PPC_I8259
 	struct device_node *cascade_node = NULL;
@@ -75,20 +74,14 @@ void __init mpc85xx_ds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
 	if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) {
-		mpic = mpic_alloc(np, r.start,
+		mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY |
 			MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 			MPIC_SINGLE_DEST_CPU,
 			0, 256, " OpenPIC  ");
 	} else {
-		mpic = mpic_alloc(np, r.start,
+		mpic = mpic_alloc(np, 0,
 			  MPIC_PRIMARY | MPIC_WANTS_RESET |
 			  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 			  MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 074be05..982f1a7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -475,20 +475,13 @@ machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier);
 static void __init mpc85xx_mds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (!np)
 		return;
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN |
 			MPIC_BROKEN_FRR_NIRQS | MPIC_SINGLE_DEST_CPU,
 			0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index cd49898..67bd1d4 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -43,7 +43,6 @@
 void __init mpc85xx_rdb_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 	unsigned long root = of_get_flat_dt_root();
 
@@ -53,20 +52,14 @@ void __init mpc85xx_rdb_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
 	if (of_flat_dt_is_compatible(root, "fsl,MPC85XXRDB-CAMP")) {
-		mpic = mpic_alloc(np, r.start,
+		mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY |
 			MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 			MPIC_SINGLE_DEST_CPU,
 			0, 256, " OpenPIC  ");
 	} else {
-		mpic = mpic_alloc(np, r.start,
+		mpic = mpic_alloc(np, 0,
 		  MPIC_PRIMARY | MPIC_WANTS_RESET |
 		  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 		  MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index d7387fa..5ffca27 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -31,7 +31,6 @@
 void __init p1010_rdb_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -40,13 +39,7 @@ void __init p1010_rdb_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start, MPIC_PRIMARY | MPIC_WANTS_RESET |
+	mpic = mpic_alloc(np, NULL, MPIC_PRIMARY | MPIC_WANTS_RESET |
 	  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS | MPIC_SINGLE_DEST_CPU,
 	  0, 256, " OpenPIC  ");
 
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 7e90e24..d911aca 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -239,7 +239,6 @@ int p1022ds_set_sysfs_monitor_port(int val)
 void __init p1022_ds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -248,13 +247,7 @@ void __init p1022_ds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		pr_err("Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 		MPIC_PRIMARY | MPIC_WANTS_RESET |
 		MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 		MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c
index 5ab21f3..30a5adb 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rds.c
@@ -109,7 +109,6 @@ machine_device_initcall(p1023_rds, p1023_publish_devices);
 static void __init mpc85xx_rds_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -118,13 +117,7 @@ static void __init mpc85xx_rds_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, NULL,
 		MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN |
 		MPIC_BROKEN_FRR_NIRQS | MPIC_SINGLE_DEST_CPU,
 		0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index d07dcb7..daced7d 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -54,7 +54,6 @@ static int sbc_rev;
 static void __init sbc8548_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 
 	np = of_find_node_by_type(np, "open-pic");
@@ -64,13 +63,7 @@ static void __init sbc8548_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c
index 09ced72..bd8b6c9 100644
--- a/arch/powerpc/platforms/85xx/sbc8560.c
+++ b/arch/powerpc/platforms/85xx/sbc8560.c
@@ -55,7 +55,6 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
 static void __init sbc8560_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np = NULL;
 #ifdef CONFIG_CPM2
 	int irq;
@@ -67,13 +66,7 @@ static void __init sbc8560_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index 747d8fb..fb4bfd6 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -46,7 +46,6 @@
 static void __init socrates_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -55,13 +54,7 @@ static void __init socrates_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 5387e9f..78aef45 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -59,7 +59,6 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
 static void __init stx_gp3_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 #ifdef CONFIG_CPM2
 	int irq;
@@ -71,13 +70,7 @@ static void __init stx_gp3_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 325de77..5775f4c 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -57,7 +57,6 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
 static void __init tqm85xx_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 #ifdef CONFIG_CPM2
 	int irq;
@@ -69,13 +68,7 @@ static void __init tqm85xx_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Could not map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index ce3f660..fccf9aa 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -42,7 +42,6 @@
 void __init xes_mpc85xx_pic_init(void)
 {
 	struct mpic *mpic;
-	struct resource r;
 	struct device_node *np;
 
 	np = of_find_node_by_type(NULL, "open-pic");
@@ -51,13 +50,7 @@ void __init xes_mpc85xx_pic_init(void)
 		return;
 	}
 
-	if (of_address_to_resource(np, 0, &r)) {
-		printk(KERN_ERR "Failed to map mpic register space\n");
-		of_node_put(np);
-		return;
-	}
-
-	mpic = mpic_alloc(np, r.start,
+	mpic = mpic_alloc(np, 0,
 			  MPIC_PRIMARY | MPIC_WANTS_RESET |
 			  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS,
 			0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 8ef8960..f85c8f0 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -33,7 +33,6 @@ void __init mpc86xx_init_irq(void)
 {
 	struct mpic *mpic;
 	struct device_node *np;
-	struct resource res;
 #ifdef CONFIG_PPC_I8259
 	struct device_node *cascade_node = NULL;
 	int cascade_irq;
@@ -43,9 +42,8 @@ void __init mpc86xx_init_irq(void)
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (np == NULL)
 		return;
-	of_address_to_resource(np, 0, &res);
 
-	mpic = mpic_alloc(np, res.start,
+	mpic = mpic_alloc(np, 0,
 			MPIC_PRIMARY | MPIC_WANTS_RESET |
 			MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 			MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 7667db4..fe360f7c 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -500,15 +500,10 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
 						int master)
 {
 	const char *name = master ? " MPIC 1   " : " MPIC 2   ";
-	struct resource r;
 	struct mpic *mpic;
 	unsigned int flags = master ? MPIC_PRIMARY : 0;
 	int rc;
 
-	rc = of_address_to_resource(np, 0, &r);
-	if (rc)
-		return NULL;
-
 	pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
 
 	flags |= MPIC_WANTS_RESET;
@@ -521,7 +516,7 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
 	if (master && (flags & MPIC_BIG_ENDIAN))
 		flags |= MPIC_U3_HT_IRQS;
 
-	mpic = mpic_alloc(np, r.start, flags, 0, 0, name);
+	mpic = mpic_alloc(np, 0, flags, 0, 0, name);
 	if (mpic == NULL)
 		return NULL;
 
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index d5d3ff3..d6ef4d9 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1144,7 +1144,29 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	const char	*vers;
 	int		i;
 	int		intvec_top;
-	u64		paddr = phys_addr;
+
+	/*
+	 * If no phyiscal address was specified then all of the phyiscal
+	 * addressing parameters must come from the device-tree.
+	 */
+	if (!phys_addr) {
+		BUG_ON(!node);
+
+		/* Check if it is DCR-based */
+		if (of_get_property(node, "dcr-reg")) {
+			flags |= MPIC_USES_DCR;
+		} else {
+			struct resource r;
+			if (of_address_to_resource(node, 0, &r))
+				return NULL;
+			phys_addr = r.start;
+		}
+	}
+
+#ifndef CONFIG_PPC_DCR
+	/* If DCR support is not enabled then MPICs cannot use it */
+	BUG_ON(flags & MPIC_USES_DCR);
+#endif
 
 	mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
 	if (mpic == NULL)
@@ -1226,35 +1248,16 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 #endif
 
 	/* default register type */
-	mpic->reg_type = (flags & MPIC_BIG_ENDIAN) ?
-		mpic_access_mmio_be : mpic_access_mmio_le;
-
-	/* If no physical address is passed in, a device-node is mandatory */
-	BUG_ON(paddr == 0 && node == NULL);
-
-	/* If no physical address passed in, check if it's dcr based */
-	if (paddr == 0 && of_get_property(node, "dcr-reg", NULL) != NULL) {
-#ifdef CONFIG_PPC_DCR
-		mpic->flags |= MPIC_USES_DCR;
+	if (flags & MPIC_USES_DCR)
 		mpic->reg_type = mpic_access_dcr;
-#else
-		BUG();
-#endif /* CONFIG_PPC_DCR */
-	}
-
-	/* If the MPIC is not DCR based, and no physical address was passed
-	 * in, try to obtain one
-	 */
-	if (paddr == 0 && !(mpic->flags & MPIC_USES_DCR)) {
-		const u32 *reg = of_get_property(node, "reg", NULL);
-		BUG_ON(reg == NULL);
-		paddr = of_translate_address(node, reg);
-		BUG_ON(paddr == OF_BAD_ADDR);
-	}
+	else if (flags & MPIC_BIG_ENDIAN)
+		mpic->reg_type = mpic_access_mmio_be;
+	else
+		mpic->reg_type = mpic_access_mmio_le;
 
 	/* Map the global registers */
-	mpic_map(mpic, node, paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
-	mpic_map(mpic, node, paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
+	mpic_map(mpic, node, phys_addr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
+	mpic_map(mpic, node, phys_addr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
 
 	/* Reset */
 
@@ -1303,7 +1306,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 
 	/* Map the per-CPU registers */
 	for (i = 0; i < mpic->num_cpus; i++) {
-		mpic_map(mpic, node, paddr, &mpic->cpuregs[i],
+		mpic_map(mpic, node, phys_addr, &mpic->cpuregs[i],
 			 MPIC_INFO(CPU_BASE) + i * MPIC_INFO(CPU_STRIDE),
 			 0x1000);
 	}
@@ -1311,7 +1314,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	/* Initialize main ISU if none provided */
 	if (mpic->isu_size == 0) {
 		mpic->isu_size = mpic->num_sources;
-		mpic_map(mpic, node, paddr, &mpic->isus[0],
+		mpic_map(mpic, node, phys_addr, &mpic->isus[0],
 			 MPIC_INFO(IRQ_BASE), MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
 	}
 	mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
@@ -1343,7 +1346,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	}
 	printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %llx,"
 	       " max %d CPUs\n",
-	       name, vers, (unsigned long long)paddr, mpic->num_cpus);
+	       name, vers, (unsigned long long)phys_addr, mpic->num_cpus);
 	printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n",
 	       mpic->isu_size, mpic->isu_shift, mpic->isu_mask);
 
-- 
1.7.2.5

^ permalink raw reply related

* Re: [4/4] powerpc/booke: Re-organize debug code
From: Jimi Xenidis @ 2011-10-31 18:37 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev
In-Reply-To: <137546DC-1F2C-4623-B96F-5F9FFB9498F9@kernel.crashing.org>


On Oct 31, 2011, at 9:21 AM, Kumar Gala wrote:

>=20
> On Oct 28, 2011, at 2:37 PM, Jimi Xenidis wrote:
>=20
>>=20
>> On Oct 5, 2011, at 9:53 PM, Kumar Gala wrote:
>>=20
>>> * set_dabr/do_dabr are no longer used when CNFIG_PPC_ADV_DEBUG_REGS =
is set
>>> refactor code a bit such that we only build the dabr code for
>>> !CONFIG_PPC_ADV_DEBUG_REGS and removed some =
CONFIG_PPC_ADV_DEBUG_REGS
>>> code in set_dabr that would never get built.
>>>=20
>>> * Move do_send_trap into traps.c as its only used there
>>>=20
>>> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
>>>=20
>>> ---
>>> arch/powerpc/include/asm/system.h |    5 +--
>>> arch/powerpc/kernel/process.c     |   97 =
+++++++++++++-----------------------
>>> arch/powerpc/kernel/traps.c       |   17 +++++++
>>> 3 files changed, 53 insertions(+), 66 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/include/asm/system.h =
b/arch/powerpc/include/asm/system.h
>>> index e30a13d..1dc5d9c 100644
>>> --- a/arch/powerpc/include/asm/system.h
>>> +++ b/arch/powerpc/include/asm/system.h
>>> @@ -111,11 +111,8 @@ static inline int debugger_dabr_match(struct =
pt_regs *regs) { return 0; }
>>> static inline int debugger_fault_handler(struct pt_regs *regs) { =
return 0; }
>>> #endif
>>>=20
>>> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
>>> extern int set_dabr(unsigned long dabr);
>>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> -extern void do_send_trap(struct pt_regs *regs, unsigned long =
address,
>>> -			 unsigned long error_code, int signal_code, int =
brkpt);
>>> -#else
>>=20
>>=20
>> This part of the patch breaks xmon.c
>> Naively I simply wrapped the xmon call:
>>=20
>> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
>> index f08836a..b5911b2 100644
>> --- a/arch/powerpc/xmon/xmon.c
>> +++ b/arch/powerpc/xmon/xmon.c
>> @@ -738,8 +738,10 @@ static void insert_bpts(void)
>>=20
>> static void insert_cpu_bpts(void)
>> {
>> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
>> 	if (dabr.enabled)
>> 		set_dabr(dabr.address | (dabr.enabled & 7));
>> +#endif
>> 	if (iabr && cpu_has_feature(CPU_FTR_IABR))
>> 		mtspr(SPRN_IABR, iabr->address
>> 			 | (iabr->enabled & (BP_IABR|BP_IABR_TE)));
>> @@ -767,7 +769,9 @@ static void remove_bpts(void)
>>=20
>> static void remove_cpu_bpts(void)
>> {
>> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
>> 	set_dabr(0);
>> +#endif
>> 	if (cpu_has_feature(CPU_FTR_IABR))
>> 		mtspr(SPRN_IABR, 0);
>> }
>=20
> Shouldn't all of these functions be #ifndef'd out as we don't support =
cpu_bpts on book-e parts in xmon code today?

Well I guess this is one for benh, because I would have expected xmon to =
test and call ppc_md.dabr.
Actually, should everyone be doing that?
-jx


>=20
>>=20
>> -JX
>>=20
>>=20
>>> extern void do_dabr(struct pt_regs *regs, unsigned long address,
>>> 		    unsigned long error_code);
>>> #endif
>>> diff --git a/arch/powerpc/kernel/process.c =
b/arch/powerpc/kernel/process.c
>>> index 269a309..989e574 100644
>>> --- a/arch/powerpc/kernel/process.c
>>> +++ b/arch/powerpc/kernel/process.c
>>> @@ -251,50 +251,6 @@ void discard_lazy_cpu_state(void)
>>> #endif /* CONFIG_SMP */
>>>=20
>>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> -void do_send_trap(struct pt_regs *regs, unsigned long address,
>>> -		  unsigned long error_code, int signal_code, int =
breakpt)
>>> -{
>>> -	siginfo_t info;
>>> -
>>> -	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>>> -			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>>> -		return;
>>> -
>>> -	/* Deliver the signal to userspace */
>>> -	info.si_signo =3D SIGTRAP;
>>> -	info.si_errno =3D breakpt;	/* breakpoint or watchpoint id =
*/
>>> -	info.si_code =3D signal_code;
>>> -	info.si_addr =3D (void __user *)address;
>>> -	force_sig_info(SIGTRAP, &info, current);
>>> -}
>>> -#else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
>>> -void do_dabr(struct pt_regs *regs, unsigned long address,
>>> -		    unsigned long error_code)
>>> -{
>>> -	siginfo_t info;
>>> -
>>> -	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>>> -			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>>> -		return;
>>> -
>>> -	if (debugger_dabr_match(regs))
>>> -		return;
>>> -
>>> -	/* Clear the DABR */
>>> -	set_dabr(0);
>>> -
>>> -	/* Deliver the signal to userspace */
>>> -	info.si_signo =3D SIGTRAP;
>>> -	info.si_errno =3D 0;
>>> -	info.si_code =3D TRAP_HWBKPT;
>>> -	info.si_addr =3D (void __user *)address;
>>> -	force_sig_info(SIGTRAP, &info, current);
>>> -}
>>> -#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>>> -
>>> -static DEFINE_PER_CPU(unsigned long, current_dabr);
>>> -
>>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> /*
>>> * Set the debug registers back to their default "safe" values.
>>> */
>>> @@ -357,16 +313,7 @@ static void switch_booke_debug_regs(struct =
thread_struct *new_thread)
>>> 			prime_debug_regs(new_thread);
>>> }
>>> #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
>>> -#ifndef CONFIG_HAVE_HW_BREAKPOINT
>>> -static void set_debug_reg_defaults(struct thread_struct *thread)
>>> -{
>>> -	if (thread->dabr) {
>>> -		thread->dabr =3D 0;
>>> -		set_dabr(0);
>>> -	}
>>> -}
>>> -#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
>>> -#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>>> +static DEFINE_PER_CPU(unsigned long, current_dabr);
>>>=20
>>> int set_dabr(unsigned long dabr)
>>> {
>>> @@ -376,19 +323,45 @@ int set_dabr(unsigned long dabr)
>>> 		return ppc_md.set_dabr(dabr);
>>>=20
>>> 	/* XXX should we have a CPU_FTR_HAS_DABR ? */
>>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> -	mtspr(SPRN_DAC1, dabr);
>>> -#ifdef CONFIG_PPC_47x
>>> -	isync();
>>> -#endif
>>> -#elif defined(CONFIG_PPC_BOOK3S)
>>> 	mtspr(SPRN_DABR, dabr);
>>> -#endif
>>> -
>>>=20
>>> 	return 0;
>>> }
>>>=20
>>> +void do_dabr(struct pt_regs *regs, unsigned long address,
>>> +		    unsigned long error_code)
>>> +{
>>> +	siginfo_t info;
>>> +
>>> +	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>>> +			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>>> +		return;
>>> +
>>> +	if (debugger_dabr_match(regs))
>>> +		return;
>>> +
>>> +	/* Clear the DABR */
>>> +	set_dabr(0);
>>> +
>>> +	/* Deliver the signal to userspace */
>>> +	info.si_signo =3D SIGTRAP;
>>> +	info.si_errno =3D 0;
>>> +	info.si_code =3D TRAP_HWBKPT;
>>> +	info.si_addr =3D (void __user *)address;
>>> +	force_sig_info(SIGTRAP, &info, current);
>>> +}
>>> +
>>> +#ifndef CONFIG_HAVE_HW_BREAKPOINT
>>> +static void set_debug_reg_defaults(struct thread_struct *thread)
>>> +{
>>> +	if (thread->dabr) {
>>> +		thread->dabr =3D 0;
>>> +		set_dabr(0);
>>> +	}
>>> +}
>>> +#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
>>> +#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>>> +
>>> #ifdef CONFIG_PPC64
>>> DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
>>> #endif
>>> diff --git a/arch/powerpc/kernel/traps.c =
b/arch/powerpc/kernel/traps.c
>>> index db733d3..edc1108 100644
>>> --- a/arch/powerpc/kernel/traps.c
>>> +++ b/arch/powerpc/kernel/traps.c
>>> @@ -1184,6 +1184,23 @@ void SoftwareEmulation(struct pt_regs *regs)
>>> #endif /* CONFIG_8xx */
>>>=20
>>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> +static void do_send_trap(struct pt_regs *regs, unsigned long =
address,
>>> +		  unsigned long error_code, int signal_code, int =
breakpt)
>>> +{
>>> +	siginfo_t info;
>>> +
>>> +	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>>> +			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>>> +		return;
>>> +
>>> +	/* Deliver the signal to userspace */
>>> +	info.si_signo =3D SIGTRAP;
>>> +	info.si_errno =3D breakpt;	/* breakpoint or watchpoint id =
*/
>>> +	info.si_code =3D signal_code;
>>> +	info.si_addr =3D (void __user *)address;
>>> +	force_sig_info(SIGTRAP, &info, current);
>>> +}
>>> +
>>> static void handle_debug(struct pt_regs *regs, unsigned long =
debug_status)
>>> {
>>> 	int changed =3D 0;
>=20

^ permalink raw reply

* Re: [1/4] powerpc: Revert show_regs() define for readability
From: Jimi Xenidis @ 2011-10-31 18:35 UTC (permalink / raw)
  To: Kumar Gala; +Cc: Linuxppc-dev list
In-Reply-To: <F8FBC967-70A0-47F4-9BB8-3E16CCEC6565@kernel.crashing.org>


On Oct 31, 2011, at 9:18 AM, Kumar Gala wrote:

>=20
> On Oct 28, 2011, at 2:40 PM, Jimi Xenidis wrote:
>=20
>>=20
>> On Oct 5, 2011, at 9:53 PM, Kumar Gala wrote:
>>=20
>>> We had an existing ifdef for 4xx & BOOKE processors that got changed =
to
>>> CONFIG_PPC_ADV_DEBUG_REGS.  The define has nothing to do with
>>> CONFIG_PPC_ADV_DEBUG_REGS.  The define really should be:
>>>=20
>>> #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>>>=20
>>> and not
>>>=20
>>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>>=20
>>> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
>>>=20
>>> ---
>>> arch/powerpc/kernel/process.c |    2 +-
>>> 1 files changed, 1 insertions(+), 1 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/kernel/process.c =
b/arch/powerpc/kernel/process.c
>>> index 8f53954..a1b5981 100644
>>> --- a/arch/powerpc/kernel/process.c
>>> +++ b/arch/powerpc/kernel/process.c
>>> @@ -657,7 +657,7 @@ void show_regs(struct pt_regs * regs)
>>> 	if ((regs->trap !=3D 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
>>> 		printk("CFAR: "REG"\n", regs->orig_gpr3);
>>> 	if (trap =3D=3D 0x300 || trap =3D=3D 0x600)
>>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>>> +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>>> 		printk("DEAR: "REG", ESR: "REG"\n", regs->dar, =
regs->dsisr);
>>=20
>> I'll be needing "|| defined(CONFIG_PPC_BOOK3E)" added to this please.
>> -jx
>=20
> Under what platform is CONFIG_PPC_BOOK3E set and CONFIG_BOOKE is not?

this was a grep typo on my part.  sorry.
-jx


>=20
> - k
>=20

^ permalink raw reply

* [RFC PATCH v3 10/10] fadump: Introduce config option for firmware assisted dump feature
From: Mahesh J Salgaonkar @ 2011-10-31 17:13 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch introduces a new config option CONFIG_FA_DUMP for firmware
assisted dump feature on Powerpc (ppc64) architecture.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6926b61..7ce773c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -379,6 +379,19 @@ config PHYP_DUMP
 
 	  If unsure, say "N"
 
+config FA_DUMP
+	bool "Firmware-assisted dump"
+	depends on PPC64 && PPC_RTAS && CRASH_DUMP
+	help
+	  A robust mechanism to get reliable kernel crash dump with
+	  assistance from firmware. This approach does not use kexec,
+	  instead firmware assists in booting the kdump kernel
+	  while preserving memory contents. Firmware-assisted dump
+	  is meant to be a kdump replacement offering robustness and
+	  speed not possible without system firmware assistance.
+
+	  If unsure, say "N"
+
 config PPCBUG_NVRAM
 	bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC
 	default y if PPC_PREP

^ permalink raw reply related

* [RFC PATCH v3 09/10] fadump: Invalidate the fadump registration during machine shutdown.
From: Mahesh J Salgaonkar @ 2011-10-31 17:12 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

If dump is active during system reboot, shutdown or halt then invalidate
the fadump registration as it does not get invalidated automatically.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/setup-common.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index ce35aaf..67e5caa 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -110,6 +110,14 @@ EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
 /* also used by kexec */
 void machine_shutdown(void)
 {
+#ifdef CONFIG_FA_DUMP
+	/*
+	 * if fadump is active, cleanup the fadump registration before we
+	 * shutdown.
+	 */
+	fadump_cleanup();
+#endif
+
 	if (ppc_md.machine_shutdown)
 		ppc_md.machine_shutdown();
 }

^ permalink raw reply related

* [RFC PATCH v3 08/10] fadump: Invalidate registration and release reserved memory for general use.
From: Mahesh J Salgaonkar @ 2011-10-31 17:11 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch introduces an sysfs interface '/sys/kernel/fadump_release_mem' to
invalidate the last fadump registration, invalidate '/proc/vmcore', release
the reserved memory for general use and re-register for future kernel dump.
Once the dump is copied to the disk, the userspace tool will echo 1 to
'/sys/kernel/fadump_release_mem'.

Release the reserved memory region excluding the size of the memory required
for future kernel dump registration.

Change in v3:
- Syncronize the fadump invalidation step to handle simultaneous writes to
  /sys/kernel/fadump_release_mem.

Change in v2:
- Introduced cpu_notes_buf_free() function to free memory allocated for
  cpu notes buffer.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |    3 +
 arch/powerpc/kernel/fadump.c      |  170 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 4a7d63e..2d983e8 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -196,6 +196,9 @@ extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
 extern void crash_fadump(struct pt_regs *, const char *);
+extern void fadump_cleanup(void);
+
+extern void vmcore_cleanup(void);
 #else	/* CONFIG_FA_DUMP */
 static inline int is_fadump_active(void) { return 0; }
 #endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 4d42fe5..ecdf81b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -33,6 +33,8 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/crash_dump.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 
 #include <asm/page.h>
 #include <asm/prom.h>
@@ -560,6 +562,19 @@ static void *cpu_notes_buf_alloc(unsigned long size)
 	return vaddr;
 }
 
+static void cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+{
+	struct page *page;
+	unsigned long order, count, i;
+
+	order = get_order(size);
+	count = 1 << order;
+	page = virt_to_page(vaddr);
+	for (i = 0; i < count; i++)
+		ClearPageReserved(page + i);
+	__free_pages(page, order);
+}
+
 /*
  * Read CPU state dump data and convert it into ELF notes.
  * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
@@ -944,6 +959,131 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
 	return 0;
 }
 
+static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+{
+	int rc = 0;
+	unsigned int wait_time;
+
+	pr_debug("Invalidating firmware-assisted dump registration\n");
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+			FADUMP_INVALIDATE, fdm,
+			sizeof(struct fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+	} while (wait_time);
+
+	if (rc) {
+		printk(KERN_ERR "Failed to invalidate firmware-assisted dump "
+			"rgistration. unexpected error(%d).\n", rc);
+		return rc;
+	}
+	fw_dump.dump_active = 0;
+	fdm_active = NULL;
+	return 0;
+}
+
+void fadump_cleanup(void)
+{
+	/* Invalidate the registration only if dump is active. */
+	if (fw_dump.dump_active) {
+		init_fadump_mem_struct(&fdm,
+			fdm_active->cpu_state_data.destination_address);
+		fadump_invalidate_dump(&fdm);
+	}
+}
+
+/*
+ * Release the memory that was reserved in early boot to preserve the memory
+ * contents. The released memory will be available for general use.
+ */
+static void fadump_release_memory(unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+	unsigned long ra_start, ra_end;
+
+	ra_start = fw_dump.reserve_dump_area_start;
+	ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		/*
+		 * exclude the dump reserve area. Will reuse it for next
+		 * fadump registration.
+		 */
+		if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
+			continue;
+
+		ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+		init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+		free_page((unsigned long)__va(addr));
+		totalram_pages++;
+	}
+}
+
+static void fadump_invalidate_release_mem(void)
+{
+	unsigned long reserved_area_start, reserved_area_end;
+	unsigned long destination_address;
+
+	mutex_lock(&fadump_mutex);
+	if (!fw_dump.dump_active) {
+		mutex_unlock(&fadump_mutex);
+		return;
+	}
+
+	destination_address = fdm_active->cpu_state_data.destination_address;
+	fadump_cleanup();
+	mutex_unlock(&fadump_mutex);
+
+	/*
+	 * Save the current reserved memory bounds we will require them
+	 * later for releasing the memory for general use.
+	 */
+	reserved_area_start = fw_dump.reserve_dump_area_start;
+	reserved_area_end = reserved_area_start +
+			fw_dump.reserve_dump_area_size;
+	/*
+	 * Setup reserve_dump_area_start and its size so that we can
+	 * reuse this reserved memory for Re-registration.
+	 */
+	fw_dump.reserve_dump_area_start = destination_address;
+	fw_dump.reserve_dump_area_size = get_dump_area_size();
+
+	fadump_release_memory(reserved_area_start, reserved_area_end);
+	if (fw_dump.cpu_notes_buf) {
+		cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
+					fw_dump.cpu_notes_buf_size);
+		fw_dump.cpu_notes_buf = 0;
+		fw_dump.cpu_notes_buf_size = 0;
+	}
+	/* Initialize the kernel dump memory structure for FAD registration. */
+	init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+}
+
+static ssize_t fadump_release_memory_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	if (!fw_dump.dump_active)
+		return -EPERM;
+
+	if (buf[0] == '1') {
+		/*
+		 * Take away the '/proc/vmcore'. We are releasing the dump
+		 * memory, hence it will not be valid anymore.
+		 */
+		vmcore_cleanup();
+		fadump_invalidate_release_mem();
+
+	} else
+		return -EINVAL;
+	return count;
+}
+
 static ssize_t fadump_enabled_show(struct kobject *kobj,
 					struct kobj_attribute *attr,
 					char *buf)
@@ -1003,10 +1143,13 @@ static int fadump_region_show(struct seq_file *m, void *private)
 	if (!fw_dump.fadump_enabled)
 		return 0;
 
+	mutex_lock(&fadump_mutex);
 	if (fdm_active)
 		fdm_ptr = fdm_active;
-	else
+	else {
+		mutex_unlock(&fadump_mutex);
 		fdm_ptr = &fdm;
+	}
 
 	seq_printf(m,
 			"CPU : [%#016llx-%#016llx] %#llx bytes, "
@@ -1036,7 +1179,7 @@ static int fadump_region_show(struct seq_file *m, void *private)
 	if (!fdm_active ||
 		(fw_dump.reserve_dump_area_start ==
 		fdm_ptr->cpu_state_data.destination_address))
-		return 0;
+		goto out;
 
 	/* Dump is active. Show reserved memory region. */
 	seq_printf(m,
@@ -1048,9 +1191,15 @@ static int fadump_region_show(struct seq_file *m, void *private)
 			fw_dump.reserve_dump_area_start,
 			fdm_ptr->cpu_state_data.destination_address -
 			fw_dump.reserve_dump_area_start);
+out:
+	if (fdm_active)
+		mutex_unlock(&fadump_mutex);
 	return 0;
 }
 
+static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
+						0200, NULL,
+						fadump_release_memory_store);
 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
 						0444, fadump_enabled_show,
 						NULL);
@@ -1091,6 +1240,13 @@ static void fadump_init_files(void)
 	if (!debugfs_file)
 		printk(KERN_ERR "fadump: unable to create debugfs file"
 				" fadump_region\n");
+
+	if (fw_dump.dump_active) {
+		rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+		if (rc)
+			printk(KERN_ERR "fadump: unable to create sysfs file"
+				" fadump_release_mem (%d)\n", rc);
+	}
 	return;
 }
 
@@ -1110,8 +1266,14 @@ int __init setup_fadump(void)
 	 * If dump data is available then see if it is valid and prepare for
 	 * saving it to the disk.
 	 */
-	if (fw_dump.dump_active)
-		process_fadump(fdm_active);
+	if (fw_dump.dump_active) {
+		/*
+		 * if dump process fails then invalidate the registration
+		 * and release memory before proceeding for re-registration.
+		 */
+		if (process_fadump(fdm_active) < 0)
+			fadump_invalidate_release_mem();
+	}
 	/* Initialize the kernel dump memory structure for FAD registration. */
 	else if (fw_dump.reserve_dump_area_size)
 		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);

^ permalink raw reply related

* [RFC PATCH v3 07/10] fadump: Introduce cleanup routine to invalidate /proc/vmcore.
From: Mahesh J Salgaonkar @ 2011-10-31 17:11 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

With the firmware-assisted dump support we don't require a reboot when we
are in second kernel after crash. The second kernel after crash is a normal
kernel boot and has knowledge about entire system RAM with the page tables
initialized for entire system RAM. Hence once the dump is saved to disk, we
can just release the reserved memory area for general use and continue
with second kernel as production kernel.

Hence when we release the reserved memory that contains dump data, the
'/proc/vmcore' will not be valid anymore. Hence this patch introduces
a cleanup routine that invalidates and removes the /proc/vmcore file. This
routine will be invoked before we release the reserved dump memory area.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 fs/proc/vmcore.c |   23 +++++++++++++++++++++++
 1 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cd99bf5..fae5526 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -699,3 +699,26 @@ static int __init vmcore_init(void)
 	return 0;
 }
 module_init(vmcore_init)
+
+/* Cleanup function for vmcore module. */
+void vmcore_cleanup(void)
+{
+	struct list_head *pos, *next;
+
+	if (proc_vmcore) {
+		remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
+		proc_vmcore = NULL;
+	}
+
+	/* clear the vmcore list. */
+	list_for_each_safe(pos, next, &vmcore_list) {
+		struct vmcore *m;
+
+		m = list_entry(pos, struct vmcore, list);
+		list_del(&m->list);
+		kfree(m);
+	}
+	kfree(elfcorebuf);
+	elfcorebuf = NULL;
+}
+EXPORT_SYMBOL_GPL(vmcore_cleanup);

^ permalink raw reply related

* [RFC PATCH v3 06/10] fadump: Add PT_NOTE program header for vmcoreinfo
From: Mahesh J Salgaonkar @ 2011-10-31 17:10 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Introduce a PT_NOTE program header that points to physical address of
vmcoreinfo_note buffer declared in kernel/kexec.c. The vmcoreinfo
note buffer is populated during crash_fadump() at the time of system
crash.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   29 +++++++++++++++++++++++++++++
 1 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index c0ecd6a..4d42fe5 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -773,6 +773,19 @@ static void setup_crash_memory_ranges(void)
 	}
 }
 
+/*
+ * If the given physical address falls within the boot memory region then
+ * return the relocated address that points to the dump region reserved
+ * for saving initial boot memory contents.
+ */
+static inline unsigned long relocate(unsigned long paddr)
+{
+	if (paddr > RMR_START && paddr < fw_dump.boot_memory_size)
+		return fdm.rmr_region.destination_address + paddr;
+	else
+		return paddr;
+}
+
 static int create_elfcore_headers(char *bufp)
 {
 	struct elfhdr *elf;
@@ -805,6 +818,22 @@ static int create_elfcore_headers(char *bufp)
 	/* Increment number of program headers. */
 	(elf->e_phnum)++;
 
+	/* setup ELF PT_NOTE for vmcoreinfo */
+	phdr = (struct elf_phdr *)bufp;
+	bufp += sizeof(struct elf_phdr);
+	phdr->p_type	= PT_NOTE;
+	phdr->p_flags	= 0;
+	phdr->p_vaddr	= 0;
+	phdr->p_align	= 0;
+
+	phdr->p_paddr	= relocate(paddr_vmcoreinfo_note());
+	phdr->p_offset	= phdr->p_paddr;
+	phdr->p_memsz	= vmcoreinfo_max_size;
+	phdr->p_filesz	= vmcoreinfo_max_size;
+
+	/* Increment number of program headers. */
+	(elf->e_phnum)++;
+
 	/* setup PT_LOAD sections. */
 
 	for (i = 0; i < crash_mem_ranges; i++) {

^ permalink raw reply related

* [RFC PATCH v3 05/10] fadump: Convert firmware-assisted cpu state dump data into elf notes.
From: Mahesh J Salgaonkar @ 2011-10-31 17:09 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

When registered for firmware assisted dump on powerpc, firmware preserves
the registers for the active CPUs during a system crash. This patch reads
the cpu register data stored in Firmware-assisted dump format (except for
crashing cpu) and converts it into elf notes and updates the PT_NOTE program
header accordingly. The exact register state for crashing cpu is saved to
fadump crash info structure in scratch area during crash_fadump() and read
during second kernel boot.

Change in v2:
- Moved the crash_fadump() invocation from generic code to panic notifier.
- Introduced cpu_notes_buf_alloc() function to allocate cpu notes buffer
  using get_free_pages(). The reason is, with the use of subsys_initcall
  the setup_fadump() is now called after mem_init(). Hence use of
  get_free_pages() to allocate memory is more approriate then using
  memblock_alloc().

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h  |   43 +++++
 arch/powerpc/kernel/fadump.c       |  299 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/setup-common.c |    8 +
 arch/powerpc/kernel/traps.c        |    5 +
 4 files changed, 353 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 8c57cdd..4a7d63e 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -53,6 +53,18 @@
 /* Dump status flag */
 #define FADUMP_ERROR_FLAG	0x2000
 
+#define FADUMP_CPU_ID_MASK	((1UL << 32) - 1)
+
+#define CPU_UNKNOWN		(~((u32)0))
+
+/* Utility macros */
+#define SKIP_TO_NEXT_CPU(reg_entry)			\
+({							\
+	while (reg_entry->reg_id != REG_ID("CPUEND"))	\
+		reg_entry++;				\
+	reg_entry++;					\
+})
+
 /* Kernel Dump section info */
 struct fadump_section {
 	u32	request_flag;
@@ -107,6 +119,9 @@ struct fw_dump {
 	unsigned long	reserve_bootvar;
 
 	unsigned long	fadumphdr_addr;
+	unsigned long	cpu_notes_buf;
+	unsigned long	cpu_notes_buf_size;
+
 	int		ibm_configure_kernel_dump;
 
 	unsigned long	fadump_enabled:1;
@@ -131,13 +146,40 @@ static inline u64 str_to_u64(const char *str)
 	return val;
 }
 #define STR_TO_HEX(x)	str_to_u64(x)
+#define REG_ID(x)	str_to_u64(x)
 
 #define FADUMP_CRASH_INFO_MAGIC		STR_TO_HEX("FADMPINF")
+#define REGSAVE_AREA_MAGIC		STR_TO_HEX("REGSAVE")
+
+/* The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct fadump_reg_save_area_header {
+	u64		magic_number;
+	u32		version;
+	u32		num_cpu_offset;
+};
+
+/* Register entry. */
+struct fadump_reg_entry {
+	u64		reg_id;
+	u64		reg_value;
+};
 
 /* fadump crash info structure */
 struct fadump_crash_info_header {
 	u64		magic_number;
 	u64		elfcorehdr_addr;
+	u32		crashing_cpu;
+	struct pt_regs	regs;
+	struct cpumask	cpu_online_mask;
 };
 
 /* Crash memory ranges */
@@ -153,6 +195,7 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
+extern void crash_fadump(struct pt_regs *, const char *);
 #else	/* CONFIG_FA_DUMP */
 static inline int is_fadump_active(void) { return 0; }
 #endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 7bfa67b..c0ecd6a 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -244,6 +244,7 @@ static unsigned long get_dump_area_size(void)
 	size += fw_dump.boot_memory_size;
 	size += sizeof(struct fadump_crash_info_header);
 	size += sizeof(struct elfhdr); /* ELF core header.*/
+	size += sizeof(struct elf_phdr); /* place holder for cpu notes */
 	/* Program headers for crash memory regions. */
 	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
 
@@ -397,6 +398,269 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
 	}
 }
 
+void crash_fadump(struct pt_regs *regs, const char *str)
+{
+	struct fadump_crash_info_header *fdh = NULL;
+
+	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+		return;
+
+	fdh = __va(fw_dump.fadumphdr_addr);
+	crashing_cpu = smp_processor_id();
+	fdh->crashing_cpu = crashing_cpu;
+	crash_save_vmcoreinfo();
+
+	if (regs)
+		fdh->regs = *regs;
+	else
+		ppc_save_regs(&fdh->regs);
+
+	fdh->cpu_online_mask = *cpu_online_mask;
+
+	/* Call ibm,os-term rtas call to trigger firmware assisted dump */
+	rtas_os_term((char *)str);
+}
+
+#define GPR_MASK	0xffffff0000000000
+static inline int gpr_index(u64 id)
+{
+	int i = -1;
+	char str[3];
+
+	if ((id & GPR_MASK) == REG_ID("GPR")) {
+		/* get the digits at the end */
+		id &= ~GPR_MASK;
+		id >>= 24;
+		str[2] = '\0';
+		str[1] = id & 0xff;
+		str[0] = (id >> 8) & 0xff;
+		sscanf(str, "%d", &i);
+		if (i > 31)
+			i = -1;
+	}
+	return i;
+}
+
+static inline void set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+	int i;
+
+	i = gpr_index(reg_id);
+	if (i >= 0)
+		regs->gpr[i] = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("NIA"))
+		regs->nip = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("MSR"))
+		regs->msr = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("CTR"))
+		regs->ctr = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("LR"))
+		regs->link = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("XER"))
+		regs->xer = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("CR"))
+		regs->ccr = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("DAR"))
+		regs->dar = (unsigned long)reg_val;
+	else if (reg_id == REG_ID("DSISR"))
+		regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct fadump_reg_entry*
+read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	while (reg_entry->reg_id != REG_ID("CPUEND")) {
+		set_regval(regs, reg_entry->reg_id, reg_entry->reg_value);
+		reg_entry++;
+	}
+	reg_entry++;
+	return reg_entry;
+}
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+			    size_t data_len)
+{
+	struct elf_note note;
+
+	note.n_namesz = strlen(name) + 1;
+	note.n_descsz = data_len;
+	note.n_type   = type;
+	memcpy(buf, &note, sizeof(note));
+	buf += (sizeof(note) + 3)/4;
+	memcpy(buf, name, note.n_namesz);
+	buf += (note.n_namesz + 3)/4;
+	memcpy(buf, data, note.n_descsz);
+	buf += (note.n_descsz + 3)/4;
+
+	return buf;
+}
+
+static void final_note(u32 *buf)
+{
+	struct elf_note note;
+
+	note.n_namesz = 0;
+	note.n_descsz = 0;
+	note.n_type   = 0;
+	memcpy(buf, &note, sizeof(note));
+}
+
+static u32 *regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+{
+	struct elf_prstatus prstatus;
+
+	memset(&prstatus, 0, sizeof(prstatus));
+	/*
+	 * FIXME: How do i get PID? Do I really need it?
+	 * prstatus.pr_pid = ????
+	 */
+	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+				&prstatus, sizeof(prstatus));
+	return buf;
+}
+
+static void update_elfcore_header(char *bufp)
+{
+	struct elfhdr *elf;
+	struct elf_phdr *phdr;
+
+	elf = (struct elfhdr *)bufp;
+	bufp += sizeof(struct elfhdr);
+
+	/* First note is a place holder for cpu notes info. */
+	phdr = (struct elf_phdr *)bufp;
+
+	if (phdr->p_type == PT_NOTE) {
+		phdr->p_paddr = fw_dump.cpu_notes_buf;
+		phdr->p_offset	= phdr->p_paddr;
+		phdr->p_filesz	= fw_dump.cpu_notes_buf_size;
+		phdr->p_memsz = fw_dump.cpu_notes_buf_size;
+	}
+	return;
+}
+
+static void *cpu_notes_buf_alloc(unsigned long size)
+{
+	void *vaddr;
+	struct page *page;
+	unsigned long order, count, i;
+
+	order = get_order(size);
+	vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+	if (!vaddr)
+		return NULL;
+
+	count = 1 << order;
+	page = virt_to_page(vaddr);
+	for (i = 0; i < count; i++)
+		SetPageReserved(page + i);
+	return vaddr;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init build_cpu_notes(const struct fadump_mem_struct *fdm)
+{
+	struct fadump_reg_save_area_header *reg_header;
+	struct fadump_reg_entry *reg_entry;
+	struct fadump_crash_info_header *fdh = NULL;
+	void *vaddr;
+	unsigned long addr;
+	u32 num_cpus, *note_buf;
+	struct pt_regs regs;
+	int i, rc = 0, cpu = 0;
+
+	if (!fdm->cpu_state_data.bytes_dumped)
+		return -EINVAL;
+
+	addr = fdm->cpu_state_data.destination_address;
+	vaddr = __va(addr);
+
+	reg_header = vaddr;
+	if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
+		printk(KERN_ERR "Unable to read register save area.\n");
+		return -ENOENT;
+	}
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("Magic Number: %llx\n", reg_header->magic_number);
+	pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
+
+	vaddr += reg_header->num_cpu_offset;
+	num_cpus = *((u32 *)(vaddr));
+	pr_debug("NumCpus     : %u\n", num_cpus);
+	vaddr += sizeof(u32);
+	reg_entry = (struct fadump_reg_entry *)vaddr;
+
+	/* Allocate buffer to hold cpu crash notes. */
+	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+	fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
+	note_buf = cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
+	if (!note_buf) {
+		printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
+			"cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+		return -ENOMEM;
+	}
+	fw_dump.cpu_notes_buf = __pa(note_buf);
+
+	pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+			(num_cpus * sizeof(note_buf_t)), note_buf);
+
+	if (fw_dump.fadumphdr_addr)
+		fdh = __va(fw_dump.fadumphdr_addr);
+
+	for (i = 0; i < num_cpus; i++) {
+		if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
+			printk(KERN_ERR "Unable to read CPU state data\n");
+			rc = -ENOENT;
+			goto error_out;
+		}
+		/* Lower 4 bytes of reg_value contains logical cpu id */
+		cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
+		if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+			SKIP_TO_NEXT_CPU(reg_entry);
+			continue;
+		}
+		pr_debug("Reading register data for cpu %d...\n", cpu);
+		if (fdh && fdh->crashing_cpu == cpu) {
+			regs = fdh->regs;
+			note_buf = regs_to_elf_notes(note_buf, &regs);
+			SKIP_TO_NEXT_CPU(reg_entry);
+		} else {
+			reg_entry++;
+			reg_entry = read_registers(reg_entry, &regs);
+			note_buf = regs_to_elf_notes(note_buf, &regs);
+		}
+	}
+	final_note(note_buf);
+
+	pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+							fdh->elfcorehdr_addr);
+	update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+	return 0;
+
+error_out:
+	memblock_free(fw_dump.cpu_notes_buf, fw_dump.cpu_notes_buf_size);
+	fw_dump.cpu_notes_buf = 0;
+	fw_dump.cpu_notes_buf_size = 0;
+	return rc;
+
+}
+
 /*
  * Validate and process the dump data stored by firmware before exporting
  * it through '/proc/vmcore'.
@@ -404,18 +668,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
 static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
 {
 	struct fadump_crash_info_header *fdh;
+	int rc = 0;
 
 	if (!fdm_active || !fw_dump.fadumphdr_addr)
 		return -EINVAL;
 
 	/* Check if the dump data is valid. */
 	if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
+			(fdm_active->cpu_state_data.error_flags != 0) ||
 			(fdm_active->rmr_region.error_flags != 0)) {
 		printk(KERN_ERR "Dump taken by platform is not valid\n");
 		return -EINVAL;
 	}
-	if (fdm_active->rmr_region.bytes_dumped !=
-			fdm_active->rmr_region.source_len) {
+	if ((fdm_active->rmr_region.bytes_dumped !=
+			fdm_active->rmr_region.source_len) ||
+			!fdm_active->cpu_state_data.bytes_dumped) {
 		printk(KERN_ERR "Dump taken by platform is incomplete\n");
 		return -EINVAL;
 	}
@@ -427,6 +694,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
 		return -EINVAL;
 	}
 
+	rc = build_cpu_notes(fdm_active);
+	if (rc)
+		return rc;
+
 	/*
 	 * We are done validating dump info and elfcore header is now ready
 	 * to be exported. set elfcorehdr_addr so that vmcore module will
@@ -512,6 +783,28 @@ static int create_elfcore_headers(char *bufp)
 	elf = (struct elfhdr *)bufp;
 	bufp += sizeof(struct elfhdr);
 
+	/*
+	 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
+	 * will be populated during second kernel boot after crash. Hence
+	 * this PT_NOTE will always be the first elf note.
+	 *
+	 * NOTE: Any new ELF note addition should be placed after this note.
+	 */
+	phdr = (struct elf_phdr *)bufp;
+	bufp += sizeof(struct elf_phdr);
+	phdr->p_type	= PT_NOTE;
+	phdr->p_flags	= 0;
+	phdr->p_vaddr	= 0;
+	phdr->p_align	= 0;
+
+	phdr->p_offset	= 0;
+	phdr->p_paddr	= 0;
+	phdr->p_filesz	= 0;
+	phdr->p_memsz	= 0;
+
+	/* Increment number of program headers. */
+	(elf->e_phnum)++;
+
 	/* setup PT_LOAD sections. */
 
 	for (i = 0; i < crash_mem_ranges; i++) {
@@ -563,6 +856,8 @@ static unsigned long init_fadump_header(unsigned long addr)
 	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
 	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
 	fdh->elfcorehdr_addr = addr;
+	/* We will set the crashing cpu id in crash_fadump() during crash. */
+	fdh->crashing_cpu = CPU_UNKNOWN;
 
 	return addr;
 }
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b1d738d..ce35aaf 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -61,6 +61,7 @@
 #include <asm/xmon.h>
 #include <asm/cputhreads.h>
 #include <mm/mmu_decl.h>
+#include <asm/fadump.h>
 
 #include "setup.h"
 
@@ -639,6 +640,13 @@ EXPORT_SYMBOL(check_legacy_ioport);
 static int ppc_panic_event(struct notifier_block *this,
                              unsigned long event, void *ptr)
 {
+#ifdef CONFIG_FA_DUMP
+	/*
+	 * If firmware-assisted dump has been registered then trigger
+	 * firmware-assisted dump and let firmware handle everything else.
+	 */
+	crash_fadump(NULL, ptr);
+#endif
 	ppc_md.panic(ptr);  /* May not return */
 	return NOTIFY_DONE;
 }
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f19d977..1508532 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -57,6 +57,7 @@
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
 #include <asm/rio.h>
+#include <asm/fadump.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
 int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -160,6 +161,10 @@ int die(const char *str, struct pt_regs *regs, long err)
 	add_taint(TAINT_DIE);
 	raw_spin_unlock_irqrestore(&die.lock, flags);
 
+#ifdef CONFIG_FA_DUMP
+	crash_fadump(regs, str);
+#endif
+
 	if (kexec_should_crash(current) ||
 		kexec_sr_activated(smp_processor_id()))
 		crash_kexec(regs);

^ permalink raw reply related

* [RFC PATCH v3 04/10] fadump: Initialize elfcore header and add PT_LOAD program headers.
From: Mahesh J Salgaonkar @ 2011-10-31 17:08 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Build the crash memory range list by traversing through system memory during
the first kernel before we register for firmware-assisted dump. After the
successful dump registration, initialize the elfcore header and populate
PT_LOAD program headers with crash memory ranges. The elfcore header is
saved in the scratch area within the reserved memory. The scratch area starts
at the end of the memory reserved for saving RMR region contents. The
scratch area contains fadump crash info structure that contains magic number
for fadump validation and physical address where the eflcore header can be
found. This structure will also be used to pass some important crash info
data to the second kernel which will help second kernel to populate ELF core
header with correct data before it gets exported through /proc/vmcore. Since
the firmware preserves the entire partition memory at the time of crash the
contents of the scratch area will be preserved till second kernel boot.

NOTE: The current design implementation does not address a possibility of
introducing additional fields (in future) to this structure without affecting
compatibility. It's on TODO list to come up with better approach to
address this.

Reserved dump area start => +-------------------------------------+
                            |  CPU state dump data                |
                            +-------------------------------------+
                            |  HPTE region data                   |
                            +-------------------------------------+
                            |  RMR region data                    |
Scratch area start       => +-------------------------------------+
                            |  fadump crash info structure {      |
                            |     magic nummber                   |
                     +------|---- elfcorehdr_addr                 |
                     |      |  }                                  |
                     +----> +-------------------------------------+
                            |  ELF core header                    |
Reserved dump area end   => +-------------------------------------+

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |   37 +++++++
 arch/powerpc/kernel/fadump.c      |  206 +++++++++++++++++++++++++++++++++++++
 include/linux/crash_dump.h        |    1 
 include/linux/memblock.h          |    1 
 kernel/crash_dump.c               |   33 ++++++
 5 files changed, 276 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 3b2f8cc..8c57cdd 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -50,6 +50,9 @@
 #define FADUMP_UNREGISTER	2
 #define FADUMP_INVALIDATE	3
 
+/* Dump status flag */
+#define FADUMP_ERROR_FLAG	0x2000
+
 /* Kernel Dump section info */
 struct fadump_section {
 	u32	request_flag;
@@ -103,6 +106,7 @@ struct fw_dump {
 	/* cmd line option during boot */
 	unsigned long	reserve_bootvar;
 
+	unsigned long	fadumphdr_addr;
 	int		ibm_configure_kernel_dump;
 
 	unsigned long	fadump_enabled:1;
@@ -111,6 +115,39 @@ struct fw_dump {
 	unsigned long	dump_registered:1;
 };
 
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ *  The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 str_to_u64(const char *str)
+{
+	u64 val = 0;
+	int i;
+
+	for (i = 0; i < sizeof(val); i++)
+		val = (*str) ? (val << 8) | *str++ : val << 8;
+	return val;
+}
+#define STR_TO_HEX(x)	str_to_u64(x)
+
+#define FADUMP_CRASH_INFO_MAGIC		STR_TO_HEX("FADMPINF")
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+	u64		magic_number;
+	u64		elfcorehdr_addr;
+};
+
+/* Crash memory ranges */
+#define INIT_CRASHMEM_RANGES	(INIT_MEMBLOCK_REGIONS + 2)
+
+struct fad_crash_memory_ranges {
+	unsigned long long	base;
+	unsigned long long	size;
+};
+
 extern int early_init_dt_scan_fw_dump(unsigned long node,
 		const char *uname, int depth, void *data);
 extern int fadump_reserve_mem(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ed38f86..7bfa67b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -32,6 +32,7 @@
 #include <linux/delay.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/crash_dump.h>
 
 #include <asm/page.h>
 #include <asm/prom.h>
@@ -53,6 +54,8 @@ static struct fadump_mem_struct fdm;
 static const struct fadump_mem_struct *fdm_active;
 
 static DEFINE_MUTEX(fadump_mutex);
+struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+int crash_mem_ranges;
 
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
@@ -239,6 +242,10 @@ static unsigned long get_dump_area_size(void)
 	size += fw_dump.cpu_state_data_size;
 	size += fw_dump.hpte_region_size;
 	size += fw_dump.boot_memory_size;
+	size += sizeof(struct fadump_crash_info_header);
+	size += sizeof(struct elfhdr); /* ELF core header.*/
+	/* Program headers for crash memory regions. */
+	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
 
 	size = PAGE_ALIGN(size);
 	return size;
@@ -304,6 +311,12 @@ int __init fadump_reserve_mem(void)
 				"for saving crash dump\n",
 				(unsigned long)(size >> 20),
 				(unsigned long)(base >> 20));
+
+		fw_dump.fadumphdr_addr =
+				fdm_active->rmr_region.destination_address +
+				fdm_active->rmr_region.source_len;
+		pr_debug("fadumphdr_addr = %p\n",
+				(void *) fw_dump.fadumphdr_addr);
 	} else {
 		/* Reserve the memory at the top of memory. */
 		size = get_dump_area_size();
@@ -384,8 +397,181 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
 	}
 }
 
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+{
+	struct fadump_crash_info_header *fdh;
+
+	if (!fdm_active || !fw_dump.fadumphdr_addr)
+		return -EINVAL;
+
+	/* Check if the dump data is valid. */
+	if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
+			(fdm_active->rmr_region.error_flags != 0)) {
+		printk(KERN_ERR "Dump taken by platform is not valid\n");
+		return -EINVAL;
+	}
+	if (fdm_active->rmr_region.bytes_dumped !=
+			fdm_active->rmr_region.source_len) {
+		printk(KERN_ERR "Dump taken by platform is incomplete\n");
+		return -EINVAL;
+	}
+
+	/* Validate the fadump crash info header */
+	fdh = __va(fw_dump.fadumphdr_addr);
+	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+		printk(KERN_ERR "Crash info header is not valid.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * We are done validating dump info and elfcore header is now ready
+	 * to be exported. set elfcorehdr_addr so that vmcore module will
+	 * export the elfcore header through '/proc/vmcore'.
+	 */
+	elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+	return 0;
+}
+
+static inline void add_crash_memory(unsigned long long base,
+					unsigned long long end)
+{
+	if (base == end)
+		return;
+
+	pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+		crash_mem_ranges, base, end - 1, (end - base));
+	crash_memory_ranges[crash_mem_ranges].base = base;
+	crash_memory_ranges[crash_mem_ranges].size = end - base;
+	crash_mem_ranges++;
+}
+
+static void exclude_reserved_area(unsigned long long start,
+					unsigned long long end)
+{
+	unsigned long long ra_start, ra_end;
+
+	ra_start = fw_dump.reserve_dump_area_start;
+	ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+	if ((ra_start < end) && (ra_end > start)) {
+		if ((start < ra_start) && (end > ra_end)) {
+			add_crash_memory(start, ra_start);
+			add_crash_memory(ra_end, end);
+		} else if (start < ra_start) {
+			add_crash_memory(start, ra_start);
+		} else if (ra_end < end) {
+			add_crash_memory(ra_end, end);
+		}
+	} else
+		add_crash_memory(start, end);
+}
+
+/*
+ * Traverse through memblock structure and setup crash memory ranges. These
+ * ranges will be used create PT_LOAD program headers in elfcore header.
+ */
+static void setup_crash_memory_ranges(void)
+{
+	struct memblock_region *reg;
+	unsigned long long start, end;
+
+	pr_debug("Setup crash memory ranges.\n");
+	crash_mem_ranges = 0;
+	/*
+	 * add the first memory chunk (RMR_START through boot_memory_size) as
+	 * a separate memory chunk. The reason is, at the time crash firmware
+	 * will move the content of this memory chunk to different location
+	 * specified during fadump registration. We need to create a separate
+	 * program header for this chunk with the correct offset.
+	 */
+	add_crash_memory(RMR_START, fw_dump.boot_memory_size);
+
+	for_each_memblock(memory, reg) {
+		start = (unsigned long long)reg->base;
+		end = start + (unsigned long long)reg->size;
+		if (start == RMR_START && end >= fw_dump.boot_memory_size)
+			start = fw_dump.boot_memory_size;
+
+		/* add this range excluding the reserved dump area. */
+		exclude_reserved_area(start, end);
+	}
+}
+
+static int create_elfcore_headers(char *bufp)
+{
+	struct elfhdr *elf;
+	struct elf_phdr *phdr;
+	int i;
+
+	init_elfcore_header(bufp);
+	elf = (struct elfhdr *)bufp;
+	bufp += sizeof(struct elfhdr);
+
+	/* setup PT_LOAD sections. */
+
+	for (i = 0; i < crash_mem_ranges; i++) {
+		unsigned long long mbase, msize;
+		mbase = crash_memory_ranges[i].base;
+		msize = crash_memory_ranges[i].size;
+
+		if (!msize)
+			continue;
+
+		phdr = (struct elf_phdr *)bufp;
+		bufp += sizeof(struct elf_phdr);
+		phdr->p_type	= PT_LOAD;
+		phdr->p_flags	= PF_R|PF_W|PF_X;
+		phdr->p_offset	= mbase;
+
+		if (mbase == RMR_START) {
+			/*
+			 * The entire RMR region will be moved by firmware
+			 * to the specified destination_address. Hence set
+			 * the correct offset.
+			 */
+			phdr->p_offset = fdm.rmr_region.destination_address;
+		}
+
+		phdr->p_paddr = mbase;
+		phdr->p_vaddr = (unsigned long)__va(mbase);
+		phdr->p_filesz = msize;
+		phdr->p_memsz = msize;
+		phdr->p_align = 0;
+
+		/* Increment number of program headers. */
+		(elf->e_phnum)++;
+	}
+	return 0;
+}
+
+static unsigned long init_fadump_header(unsigned long addr)
+{
+	struct fadump_crash_info_header *fdh;
+
+	if (!addr)
+		return 0;
+
+	fw_dump.fadumphdr_addr = addr;
+	fdh = __va(addr);
+	addr += sizeof(struct fadump_crash_info_header);
+
+	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
+	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
+	fdh->elfcorehdr_addr = addr;
+
+	return addr;
+}
+
 static void register_fadump(void)
 {
+	unsigned long addr;
+	void *vaddr;
+
 	/*
 	 * If no memory is reserved then we can not register for firmware-
 	 * assisted dump.
@@ -393,6 +579,16 @@ static void register_fadump(void)
 	if (!fw_dump.reserve_dump_area_size)
 		return;
 
+	setup_crash_memory_ranges();
+
+	addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
+	/* Initialize fadump crash info header. */
+	addr = init_fadump_header(addr);
+	vaddr = __va(addr);
+
+	pr_debug("Creating ELF core headers at %#016lx\n", addr);
+	create_elfcore_headers(vaddr);
+
 	/* register the future kernel dump with firmware. */
 	register_fw_dump(&fdm);
 }
@@ -586,11 +782,17 @@ int __init setup_fadump(void)
 	}
 
 	fadump_show_config();
+	/*
+	 * If dump data is available then see if it is valid and prepare for
+	 * saving it to the disk.
+	 */
+	if (fw_dump.dump_active)
+		process_fadump(fdm_active);
 	/* Initialize the kernel dump memory structure for FAD registration. */
-	if (fw_dump.reserve_dump_area_size)
+	else if (fw_dump.reserve_dump_area_size)
 		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
-	fadump_init_files();
 
+	fadump_init_files();
 	return 1;
 }
 subsys_initcall(setup_fadump);
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 7405407..14627d4 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -13,6 +13,7 @@ extern unsigned long long elfcorehdr_addr;
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
+extern int init_elfcore_header(char *);
 
 /* Architecture code defines this if there are other possible ELF
  * machine types, e.g. on bi-arch capable hardware. */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7525e38..63ae7a0 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -152,6 +152,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 	     region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);	\
 	     region++)
 
+#define memblock_num_regions(memblock_type)	(memblock.memblock_type.cnt)
 
 #ifdef ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __init
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 5f85690..ce93529 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -4,6 +4,10 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS 0
+#endif
+
 /*
  * If we have booted due to a crash, max_pfn will be a very low value. We need
  * to know the amount of memory that the previous kernel used.
@@ -32,3 +36,32 @@ static int __init setup_elfcorehdr(char *arg)
 	return end > arg ? 0 : -EINVAL;
 }
 early_param("elfcorehdr", setup_elfcorehdr);
+
+int init_elfcore_header(char *bufp)
+{
+	struct elfhdr *elf;
+
+	elf = (struct elfhdr *) bufp;
+	bufp += sizeof(struct elfhdr);
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS]	= ELF_CLASS;
+	elf->e_ident[EI_DATA]	= ELF_DATA;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	elf->e_ident[EI_OSABI] = ELF_OSABI;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+	elf->e_type	= ET_CORE;
+	elf->e_machine	= ELF_ARCH;
+	elf->e_version	= EV_CURRENT;
+	elf->e_entry	= 0;
+	elf->e_phoff	= sizeof(struct elfhdr);
+	elf->e_shoff	= 0;
+	elf->e_flags	= ELF_CORE_EFLAGS;
+	elf->e_ehsize	= sizeof(struct elfhdr);
+	elf->e_phentsize = sizeof(struct elf_phdr);
+	elf->e_phnum	= 0;
+	elf->e_shentsize = 0;
+	elf->e_shnum	= 0;
+	elf->e_shstrndx	= 0;
+
+	return 0;
+}

^ permalink raw reply related

* [RFC PATCH v3 03/10] fadump: Register for firmware assisted dump.
From: Mahesh J Salgaonkar @ 2011-10-31 17:07 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch registers for firmware-assisted dump using rtas token
ibm,configure-kernel-dump. During registration firmware is informed about
the reserved area where it saves the CPU state data, HPTE table and contents
of RMR region at the time of kernel crash. Apart from this, firmware also
preserves the contents of entire partition memory even if it is not specified
during registration.

This patch also populates sysfs files under /sys/kernel to display
fadump status and reserved memory regions.

Change in v3:
- Re-factored the implementation to work with kdump service start/stop.
  Introduce fadump_registered sysfs control file which will be used by
  kdump init scripts to start/stop firmware assisted dump. echo 1 to
  /sys/kernel/fadump_registered file for fadump registration and
  echo 0 to /sys/kernel/fadump_registered file for fadump un-registration.
- Introduced the locking mechanism to handle simultaneous writes to
  /sys/kernel/fadump_registered file.

Change in v2:
- Removed few debug print statements.
- Moved the setup_fadump() call from setup_system() and now calling it
  subsys_initcall.
- Moved fadump_region attribute under debugfs.
- Clear the TCE entries if firmware assisted dump is active.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |   57 ++++++
 arch/powerpc/kernel/fadump.c      |  352 +++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/iommu.c       |    8 +
 arch/powerpc/mm/hash_utils_64.c   |   11 +
 4 files changed, 424 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 0b040c1..3b2f8cc 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -42,6 +42,58 @@
 #define FADUMP_HPTE_REGION	0x0002
 #define FADUMP_REAL_MODE_REGION	0x0011
 
+/* Dump request flag */
+#define FADUMP_REQUEST_FLAG	0x00000001
+
+/* FAD commands */
+#define FADUMP_REGISTER		1
+#define FADUMP_UNREGISTER	2
+#define FADUMP_INVALIDATE	3
+
+/* Kernel Dump section info */
+struct fadump_section {
+	u32	request_flag;
+	u16	source_data_type;
+	u16	error_flags;
+	u64	source_address;
+	u64	source_len;
+	u64	bytes_dumped;
+	u64	destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct fadump_section_header {
+	u32	dump_format_version;
+	u16	dump_num_sections;
+	u16	dump_status_flag;
+	u32	offset_first_dump_section;
+
+	/* Fields for disk dump option. */
+	u32	dd_block_size;
+	u64	dd_block_offset;
+	u64	dd_num_blocks;
+	u32	dd_offset_disk_path;
+
+	/* Maximum time allowed to prevent an automatic dump-reboot. */
+	u32	max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct fadump_mem_struct {
+	struct fadump_section_header	header;
+
+	/* Kernel dump sections */
+	struct fadump_section		cpu_state_data;
+	struct fadump_section		hpte_region;
+	struct fadump_section		rmr_region;
+};
+
+/* Firmware-assisted dump configuration details. */
 struct fw_dump {
 	unsigned long	cpu_state_data_size;
 	unsigned long	hpte_region_size;
@@ -56,10 +108,15 @@ struct fw_dump {
 	unsigned long	fadump_enabled:1;
 	unsigned long	fadump_supported:1;
 	unsigned long	dump_active:1;
+	unsigned long	dump_registered:1;
 };
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
 		const char *uname, int depth, void *data);
 extern int fadump_reserve_mem(void);
+extern int setup_fadump(void);
+extern int is_fadump_active(void);
+#else	/* CONFIG_FA_DUMP */
+static inline int is_fadump_active(void) { return 0; }
 #endif
 #endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 05dffc0..ed38f86 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -29,6 +29,9 @@
 
 #include <linux/string.h>
 #include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <asm/page.h>
 #include <asm/prom.h>
@@ -46,6 +49,10 @@ struct dump_section {
 } __packed;
 
 static struct fw_dump fw_dump;
+static struct fadump_mem_struct fdm;
+static const struct fadump_mem_struct *fdm_active;
+
+static DEFINE_MUTEX(fadump_mutex);
 
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
@@ -74,7 +81,8 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
 	 * The 'ibm,kernel-dump' rtas node is present only if there is
 	 * dump data waiting for us.
 	 */
-	if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL))
+	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+	if (fdm_active)
 		fw_dump.dump_active = 1;
 
 	/* Get the sizes required to store dump data for the firmware provided
@@ -101,6 +109,85 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
 	return 1;
 }
 
+int is_fadump_active(void)
+{
+	return fw_dump.dump_active;
+}
+
+/* Print firmware assisted dump configurations for debugging purpose. */
+static void fadump_show_config(void)
+{
+	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
+			(fw_dump.fadump_supported ? "present" : "no support"));
+
+	if (!fw_dump.fadump_supported)
+		return;
+
+	pr_debug("Fadump enabled    : %s\n",
+				(fw_dump.fadump_enabled ? "yes" : "no"));
+	pr_debug("Dump Active       : %s\n",
+				(fw_dump.dump_active ? "yes" : "no"));
+	pr_debug("Dump section sizes:\n");
+	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
+	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
+	pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
+}
+
+static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
+				unsigned long addr)
+{
+	if (!fdm)
+		return 0;
+
+	memset(fdm, 0, sizeof(struct fadump_mem_struct));
+	addr = addr & PAGE_MASK;
+
+	fdm->header.dump_format_version = 0x00000001;
+	fdm->header.dump_num_sections = 3;
+	fdm->header.dump_status_flag = 0;
+	fdm->header.offset_first_dump_section =
+		(u32)offsetof(struct fadump_mem_struct, cpu_state_data);
+
+	/*
+	 * Fields for disk dump option.
+	 * We are not using disk dump option, hence set these fields to 0.
+	 */
+	fdm->header.dd_block_size = 0;
+	fdm->header.dd_block_offset = 0;
+	fdm->header.dd_num_blocks = 0;
+	fdm->header.dd_offset_disk_path = 0;
+
+	/* set 0 to disable an automatic dump-reboot. */
+	fdm->header.max_time_auto = 0;
+
+	/* Kernel dump sections */
+	/* cpu state data section. */
+	fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
+	fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
+	fdm->cpu_state_data.source_address = 0;
+	fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
+	fdm->cpu_state_data.destination_address = addr;
+	addr += fw_dump.cpu_state_data_size;
+
+	/* hpte region section */
+	fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
+	fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
+	fdm->hpte_region.source_address = 0;
+	fdm->hpte_region.source_len = fw_dump.hpte_region_size;
+	fdm->hpte_region.destination_address = addr;
+	addr += fw_dump.hpte_region_size;
+
+	/* RMR region section */
+	fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
+	fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
+	fdm->rmr_region.source_address = RMR_START;
+	fdm->rmr_region.source_len = fw_dump.boot_memory_size;
+	fdm->rmr_region.destination_address = addr;
+	addr += fw_dump.boot_memory_size;
+
+	return addr;
+}
+
 /**
  * calculate_reserve_size() - reserve variable boot area 5% of System RAM
  *
@@ -170,8 +257,15 @@ int __init fadump_reserve_mem(void)
 		fw_dump.fadump_enabled = 0;
 		return 0;
 	}
-	/* Initialize boot memory size */
-	fw_dump.boot_memory_size = calculate_reserve_size();
+	/*
+	 * Initialize boot memory size
+	 * If dump is active then we have already calculated the size during
+	 * first kernel.
+	 */
+	if (fdm_active)
+		fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
+	else
+		fw_dump.boot_memory_size = calculate_reserve_size();
 
 	/*
 	 * Calculate the memory boundary.
@@ -248,3 +342,255 @@ static int __init early_fadump_reserve_mem(char *p)
 	return 0;
 }
 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static void register_fw_dump(struct fadump_mem_struct *fdm)
+{
+	int rc;
+	unsigned int wait_time;
+
+	pr_debug("Registering for firmware-assisted kernel dump...\n");
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+			FADUMP_REGISTER, fdm,
+			sizeof(struct fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+
+	} while (wait_time);
+
+	switch (rc) {
+	case -1:
+		printk(KERN_ERR "Failed to register firmware-assisted kernel"
+			" dump. Hardware Error(%d).\n", rc);
+		break;
+	case -3:
+		printk(KERN_ERR "Failed to register firmware-assisted kernel"
+			" dump. Parameter Error(%d).\n", rc);
+		break;
+	case -9:
+		printk(KERN_ERR "firmware-assisted kernel dump is already "
+			" registered.");
+		fw_dump.dump_registered = 1;
+		break;
+	case 0:
+		printk(KERN_INFO "firmware-assisted kernel dump registration"
+			" is successful\n");
+		fw_dump.dump_registered = 1;
+		break;
+	}
+}
+
+static void register_fadump(void)
+{
+	/*
+	 * If no memory is reserved then we can not register for firmware-
+	 * assisted dump.
+	 */
+	if (!fw_dump.reserve_dump_area_size)
+		return;
+
+	/* register the future kernel dump with firmware. */
+	register_fw_dump(&fdm);
+}
+
+static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+{
+	int rc = 0;
+	unsigned int wait_time;
+
+	pr_debug("Un-register firmware-assisted dump\n");
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+			FADUMP_UNREGISTER, fdm,
+			sizeof(struct fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+	} while (wait_time);
+
+	if (rc) {
+		printk(KERN_ERR "Failed to un-register firmware-assisted dump."
+			" unexpected error(%d).\n", rc);
+		return rc;
+	}
+	fw_dump.dump_registered = 0;
+	return 0;
+}
+
+static ssize_t fadump_enabled_show(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
+}
+
+static ssize_t fadump_register_show(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", fw_dump.dump_registered);
+}
+
+static ssize_t fadump_register_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	int ret = 0;
+
+	if (!fw_dump.fadump_enabled || fdm_active)
+		return -EPERM;
+
+	mutex_lock(&fadump_mutex);
+
+	switch (buf[0]) {
+	case '0':
+		if (fw_dump.dump_registered == 0) {
+			ret = -EINVAL;
+			goto unlock_out;
+		}
+		/* Un-register Firmware-assisted dump */
+		fadump_unregister_dump(&fdm);
+		break;
+	case '1':
+		if (fw_dump.dump_registered == 1) {
+			ret = -EINVAL;
+			goto unlock_out;
+		}
+		/* Register Firmware-assisted dump */
+		register_fadump();
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+unlock_out:
+	mutex_unlock(&fadump_mutex);
+	return ret < 0 ? ret : count;
+}
+
+static int fadump_region_show(struct seq_file *m, void *private)
+{
+	const struct fadump_mem_struct *fdm_ptr;
+
+	if (!fw_dump.fadump_enabled)
+		return 0;
+
+	if (fdm_active)
+		fdm_ptr = fdm_active;
+	else
+		fdm_ptr = &fdm;
+
+	seq_printf(m,
+			"CPU : [%#016llx-%#016llx] %#llx bytes, "
+			"Dumped: %#llx\n",
+			fdm_ptr->cpu_state_data.destination_address,
+			fdm_ptr->cpu_state_data.destination_address +
+			fdm_ptr->cpu_state_data.source_len - 1,
+			fdm_ptr->cpu_state_data.source_len,
+			fdm_ptr->cpu_state_data.bytes_dumped);
+	seq_printf(m,
+			"HPTE: [%#016llx-%#016llx] %#llx bytes, "
+			"Dumped: %#llx\n",
+			fdm_ptr->hpte_region.destination_address,
+			fdm_ptr->hpte_region.destination_address +
+			fdm_ptr->hpte_region.source_len - 1,
+			fdm_ptr->hpte_region.source_len,
+			fdm_ptr->hpte_region.bytes_dumped);
+	seq_printf(m,
+			"DUMP: [%#016llx-%#016llx] %#llx bytes, "
+			"Dumped: %#llx\n",
+			fdm_ptr->rmr_region.destination_address,
+			fdm_ptr->rmr_region.destination_address +
+			fdm_ptr->rmr_region.source_len - 1,
+			fdm_ptr->rmr_region.source_len,
+			fdm_ptr->rmr_region.bytes_dumped);
+
+	if (!fdm_active ||
+		(fw_dump.reserve_dump_area_start ==
+		fdm_ptr->cpu_state_data.destination_address))
+		return 0;
+
+	/* Dump is active. Show reserved memory region. */
+	seq_printf(m,
+			"    : [%#016llx-%#016llx] %#llx bytes, "
+			"Dumped: %#llx\n",
+			(unsigned long long)fw_dump.reserve_dump_area_start,
+			fdm_ptr->cpu_state_data.destination_address - 1,
+			fdm_ptr->cpu_state_data.destination_address -
+			fw_dump.reserve_dump_area_start,
+			fdm_ptr->cpu_state_data.destination_address -
+			fw_dump.reserve_dump_area_start);
+	return 0;
+}
+
+static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
+						0444, fadump_enabled_show,
+						NULL);
+static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
+						0644, fadump_register_show,
+						fadump_register_store);
+
+static int fadump_region_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, fadump_region_show, inode->i_private);
+}
+
+static const struct file_operations fadump_region_fops = {
+	.open    = fadump_region_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+static void fadump_init_files(void)
+{
+	struct dentry *debugfs_file;
+	int rc = 0;
+
+	rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
+	if (rc)
+		printk(KERN_ERR "fadump: unable to create sysfs file"
+			" fadump_enabled (%d)\n", rc);
+
+	rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
+	if (rc)
+		printk(KERN_ERR "fadump: unable to create sysfs file"
+			" fadump_registered (%d)\n", rc);
+
+	debugfs_file = debugfs_create_file("fadump_region", 0444,
+					powerpc_debugfs_root, NULL,
+					&fadump_region_fops);
+	if (!debugfs_file)
+		printk(KERN_ERR "fadump: unable to create debugfs file"
+				" fadump_region\n");
+	return;
+}
+
+/*
+ * Prepare for firmware-assisted dump.
+ */
+int __init setup_fadump(void)
+{
+	if (!fw_dump.fadump_supported) {
+		printk(KERN_ERR "Firmware-assisted dump is not supported on"
+			" this hardware\n");
+		return 0;
+	}
+
+	fadump_show_config();
+	/* Initialize the kernel dump memory structure for FAD registration. */
+	if (fw_dump.reserve_dump_area_size)
+		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+	fadump_init_files();
+
+	return 1;
+}
+subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 961bb03..2549b53 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -39,6 +39,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
 #include <asm/kdump.h>
+#include <asm/fadump.h>
 
 #define DBG(...)
 
@@ -445,7 +446,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 
 static void iommu_table_clear(struct iommu_table *tbl)
 {
-	if (!is_kdump_kernel()) {
+	/*
+	 * In case of firmware assisted dump system goes through clean
+	 * reboot process at the time of system crash. Hence it's safe to
+	 * clear the TCE entries if firmware assisted dump is active.
+	 */
+	if (!is_kdump_kernel() || is_fadump_active()) {
 		/* Clear the table in case firmware left allocations in it */
 		ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
 		return;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 26b2872..ba64f1a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -54,6 +54,7 @@
 #include <asm/spu.h>
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
+#include <asm/fadump.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -627,6 +628,16 @@ static void __init htab_initialize(void)
 		/* Using a hypervisor which owns the htab */
 		htab_address = NULL;
 		_SDR1 = 0; 
+#ifdef CONFIG_FA_DUMP
+		/*
+		 * If firmware assisted dump is active firmware preserves
+		 * the contents of htab along with entire partition memory.
+		 * Clear the htab if firmware assisted dump is active so
+		 * that we dont end up using old mappings.
+		 */
+		if (is_fadump_active() && ppc_md.hpte_clear_all)
+			ppc_md.hpte_clear_all();
+#endif
 	} else {
 		/* Find storage for the HPT.  Must be contiguous in
 		 * the absolute address space. On cell we want it to be

^ permalink raw reply related

* [RFC PATCH v3 02/10] fadump: Reserve the memory for firmware assisted dump.
From: Mahesh J Salgaonkar @ 2011-10-31 17:06 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Reserve the memory during early boot to preserve CPU state data, HPTE region
and RMR region data in case of kernel crash. At the time of crash, powerpc
firmware will store CPU state data, HPTE region data and move RMR region
data to the reserved memory area.

If the firmware-assisted dump fails to reserve the memory, then fallback
to existing kexec-based kdump.

The most of the code implementation to reserve memory has been
adapted from phyp assisted dump implementation written by Linas Vepstas
and Manish Ahuja

Change in v2:
- Modified to use standard pr_debug() macro.
- Modified early_init_dt_scan_fw_dump() to get the size of
  "ibm,configure-kernel-dump-sizes" property and use it to iterate through
  an array of dump sections.
- Introduced boot option 'fadump_reserve_mem=' to let user specify the
  fadump boot memory to be reserved.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |   65 ++++++++++
 arch/powerpc/kernel/Makefile      |    1 
 arch/powerpc/kernel/fadump.c      |  250 +++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/prom.c        |   15 ++
 4 files changed, 330 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/include/asm/fadump.h
 create mode 100644 arch/powerpc/kernel/fadump.c

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
new file mode 100644
index 0000000..0b040c1
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump.h
@@ -0,0 +1,65 @@
+/*
+ * Firmware Assisted dump header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __PPC64_FA_DUMP_H__
+#define __PPC64_FA_DUMP_H__
+
+#ifdef CONFIG_FA_DUMP
+
+/*
+ * The RMR region will be saved for later dumping when kernel crashes.
+ * Set this to 256MB.
+ */
+#define RMR_START	0x0
+#define RMR_END		(ppc64_rma_size)
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully.
+ */
+#define MIN_BOOT_MEM	((RMR_END < (0x1UL << 28)) ? (0x1UL << 28) : RMR_END)
+
+/* Firmware provided dump sections */
+#define FADUMP_CPU_STATE_DATA	0x0001
+#define FADUMP_HPTE_REGION	0x0002
+#define FADUMP_REAL_MODE_REGION	0x0011
+
+struct fw_dump {
+	unsigned long	cpu_state_data_size;
+	unsigned long	hpte_region_size;
+	unsigned long	boot_memory_size;
+	unsigned long	reserve_dump_area_start;
+	unsigned long	reserve_dump_area_size;
+	/* cmd line option during boot */
+	unsigned long	reserve_bootvar;
+
+	int		ibm_configure_kernel_dump;
+
+	unsigned long	fadump_enabled:1;
+	unsigned long	fadump_supported:1;
+	unsigned long	dump_active:1;
+};
+
+extern int early_init_dt_scan_fw_dump(unsigned long node,
+		const char *uname, int depth, void *data);
+extern int fadump_reserve_mem(void);
+#endif
+#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ce4f7f1..59b549c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO)		+= vio.o
 obj-$(CONFIG_IBMEBUS)           += ibmebus.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_FA_DUMP)		+= fadump.o
 ifeq ($(CONFIG_PPC32),y)
 obj-$(CONFIG_E500)		+= idle_e500.o
 endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 0000000..05dffc0
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,250 @@
+/*
+ * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
+ * dump with assistance from firmware. This approach does not use kexec,
+ * instead firmware assists in booting the kdump kernel while preserving
+ * memory contents. The most of the code implementation has been adapted
+ * from phyp assisted dump implementation written by Linas Vepstas and
+ * Manish Ahuja
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+
+/*
+ * The RTAS property "ibm,configure-kernel-dump-sizes" returns dump
+ * sizes for the firmware provided dump sections (cpu state data
+ * and hpte region).
+ */
+struct dump_section {
+	u32		dump_section;
+	unsigned long	section_size;
+} __packed;
+
+static struct fw_dump fw_dump;
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node,
+			const char *uname, int depth, void *data)
+{
+	const struct dump_section *sections;
+	int i, num_sections;
+	unsigned long size;
+	const int *token;
+
+	if (depth != 1 || strcmp(uname, "rtas") != 0)
+		return 0;
+
+	/*
+	 * Check if Firmware Assisted dump is supported. if yes, check
+	 * if dump has been initiated on last reboot.
+	 */
+	token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+	if (!token)
+		return 0;
+
+	fw_dump.fadump_supported = 1;
+	fw_dump.ibm_configure_kernel_dump = *token;
+
+	/*
+	 * The 'ibm,kernel-dump' rtas node is present only if there is
+	 * dump data waiting for us.
+	 */
+	if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL))
+		fw_dump.dump_active = 1;
+
+	/* Get the sizes required to store dump data for the firmware provided
+	 * dump sections.
+	 */
+	sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+					&size);
+
+	if (!sections)
+		return 0;
+
+	num_sections = size / sizeof(struct dump_section);
+
+	for (i = 0; i < num_sections; i++) {
+		switch (sections[i].dump_section) {
+		case FADUMP_CPU_STATE_DATA:
+			fw_dump.cpu_state_data_size = sections[i].section_size;
+			break;
+		case FADUMP_HPTE_REGION:
+			fw_dump.hpte_region_size = sections[i].section_size;
+			break;
+		}
+	}
+	return 1;
+}
+
+/**
+ * calculate_reserve_size() - reserve variable boot area 5% of System RAM
+ *
+ * Function to find the largest memory size we need to reserve during early
+ * boot process. This will be the size of the memory that is required for a
+ * kernel to boot successfully.
+ *
+ * This function has been taken from phyp-assisted dump feature implementation.
+ *
+ * returns larger of 256MB or 5% rounded down to multiples of 256MB.
+ *
+ * TODO: Come up with better approach to find out more accurate memory size
+ * that is required for a kernel to boot successfully.
+ *
+ */
+static inline unsigned long calculate_reserve_size(void)
+{
+	unsigned long size;
+
+	/*
+	 * Check if the size is specified through fadump_reserve_mem= cmdline
+	 * option. If yes, then use that.
+	 */
+	if (fw_dump.reserve_bootvar)
+		return fw_dump.reserve_bootvar;
+
+	/* divide by 20 to get 5% of value */
+	size = memblock_end_of_DRAM();
+	do_div(size, 20);
+
+	/* round it down in multiples of 256 */
+	size = size & ~0x0FFFFFFFUL;
+
+	/* Truncate to memory_limit. We don't want to over reserve the memory.*/
+	if (memory_limit && size > memory_limit)
+		size = memory_limit;
+
+	return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+}
+
+/*
+ * Calculate the total memory size required to be reserved for
+ * firmware-assisted dump registration.
+ */
+static unsigned long get_dump_area_size(void)
+{
+	unsigned long size = 0;
+
+	size += fw_dump.cpu_state_data_size;
+	size += fw_dump.hpte_region_size;
+	size += fw_dump.boot_memory_size;
+
+	size = PAGE_ALIGN(size);
+	return size;
+}
+
+int __init fadump_reserve_mem(void)
+{
+	unsigned long base, size, memory_boundary;
+
+	if (!fw_dump.fadump_enabled)
+		return 0;
+
+	if (!fw_dump.fadump_supported) {
+		printk(KERN_ERR "Firmware-assisted dump is not supported on"
+				" this hardware\n");
+		fw_dump.fadump_enabled = 0;
+		return 0;
+	}
+	/* Initialize boot memory size */
+	fw_dump.boot_memory_size = calculate_reserve_size();
+
+	/*
+	 * Calculate the memory boundary.
+	 * If memory_limit is less than actual memory boundary then reserve
+	 * the memory for fadump beyond the memory_limit and adjust the
+	 * memory_limit accordingly, so that the running kernel can run with
+	 * specified memory_limit.
+	 */
+	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
+		size = get_dump_area_size();
+		if ((memory_limit + size) < memblock_end_of_DRAM())
+			memory_limit += size;
+		else
+			memory_limit = memblock_end_of_DRAM();
+		printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
+				" dump, now %#016llx\n",
+				(unsigned long long)memory_limit);
+	}
+	if (memory_limit)
+		memory_boundary = memory_limit;
+	else
+		memory_boundary = memblock_end_of_DRAM();
+
+	if (fw_dump.dump_active) {
+		printk(KERN_INFO "Firmware-assisted dump is active.\n");
+		/*
+		 * If last boot has crashed then reserve all the memory
+		 * above boot_memory_size so that we don't touch it until
+		 * dump is written to disk by userspace tool. This memory
+		 * will be released for general use once the dump is saved.
+		 */
+		base = fw_dump.boot_memory_size;
+		size = memory_boundary - base;
+		memblock_reserve(base, size);
+		printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+				"for saving crash dump\n",
+				(unsigned long)(size >> 20),
+				(unsigned long)(base >> 20));
+	} else {
+		/* Reserve the memory at the top of memory. */
+		size = get_dump_area_size();
+		base = memory_boundary - size;
+		memblock_reserve(base, size);
+		printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+				"for firmware-assisted dump\n",
+				(unsigned long)(size >> 20),
+				(unsigned long)(base >> 20));
+	}
+	fw_dump.reserve_dump_area_start = base;
+	fw_dump.reserve_dump_area_size = size;
+	return 1;
+}
+
+/* Look for fadump= cmdline option. */
+static int __init early_fadump_param(char *p)
+{
+	if (!p)
+		return 1;
+
+	if (p[0] == '1')
+		fw_dump.fadump_enabled = 1;
+	else if (p[0] == '0')
+		fw_dump.fadump_enabled = 0;
+
+	return 0;
+}
+early_param("fadump", early_fadump_param);
+
+/* Look for fadump_reserve_mem= cmdline option */
+static int __init early_fadump_reserve_mem(char *p)
+{
+	if (p)
+		fw_dump.reserve_bootvar = memparse(p, &p);
+	return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 174e1e9..3fe75eb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -54,6 +54,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/phyp_dump.h>
 #include <asm/kexec.h>
+#include <asm/fadump.h>
 #include <mm/mmu_decl.h>
 
 #ifdef DEBUG
@@ -712,6 +713,11 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL);
 #endif
 
+#ifdef CONFIG_FA_DUMP
+	/* scan tree to see if dump is active during last boot */
+	of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
+#endif
+
 	/* Retrieve various informations from the /chosen node of the
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
@@ -735,7 +741,14 @@ void __init early_init_devtree(void *params)
 	if (PHYSICAL_START > MEMORY_START)
 		memblock_reserve(MEMORY_START, 0x8000);
 	reserve_kdump_trampoline();
-	reserve_crashkernel();
+#ifdef CONFIG_FA_DUMP
+	/*
+	 * If we fail to reserve memory for firmware-assisted dump then
+	 * fallback to kexec based kdump.
+	 */
+	if (fadump_reserve_mem() == 0)
+#endif
+		reserve_crashkernel();
 	early_reserve_mem();
 	phyp_dump_reserve_mem();
 

^ permalink raw reply related

* [RFC PATCH v3 01/10] fadump: Add documentation for firmware-assisted dump.
From: Mahesh J Salgaonkar @ 2011-10-31 17:05 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang
In-Reply-To: <20111031170200.12259.27663.stgit@mars.in.ibm.com>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Documentation for firmware-assisted dump. This document is based on the
original documentation written for phyp assisted dump by Linas Vepstas
and Manish Ahuja, with few changes to reflect the current implementation.

Change in v3:
- Modified the documentation to reflect introdunction of fadump_registered
  sysfs file and few minor changes.

Change in v2:
- Modified the documentation to reflect the change of fadump_region
  file under debugfs filesystem.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 Documentation/powerpc/firmware-assisted-dump.txt |  262 ++++++++++++++++++++++
 1 files changed, 262 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/powerpc/firmware-assisted-dump.txt

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
new file mode 100644
index 0000000..ba6724a
--- /dev/null
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -0,0 +1,262 @@
+
+                   Firmware-Assisted Dump
+                   ------------------------
+                       July 2011
+
+The goal of firmware-assisted dump is to enable the dump of
+a crashed system, and to do so from a fully-reset system, and
+to minimize the total elapsed time until the system is back
+in production use.
+
+As compared to kdump or other strategies, firmware-assisted
+dump offers several strong, practical advantages:
+
+-- Unlike kdump, the system has been reset, and loaded
+   with a fresh copy of the kernel.  In particular,
+   PCI and I/O devices have been reinitialized and are
+   in a clean, consistent state.
+-- Once the dump is copied out, the memory that held the dump
+   is immediately available to the running kernel. A further
+   reboot isn't required.
+
+The above can only be accomplished by coordination with,
+and assistance from the Power firmware. The procedure is
+as follows:
+
+-- The first kernel registers the sections of memory with the
+   Power firmware for dump preservation during OS initialization.
+   This registered sections of memory is reserved by the first
+   kernel during early boot.
+
+-- When a system crashes, the Power firmware will save
+   the low memory (boot memory of size larger of 5% of system RAM
+   or 256MB) of RAM to a previously registered save region. It
+   will also save system registers, and hardware PTE's.
+
+   NOTE: The term 'boot memory' means size of the low memory chunk
+         that is required for a kernel to boot successfully when
+         booted with restricted memory. By default, the boot memory
+         size will be calculated to larger of 5% of system RAM or
+         256MB. Alternatively, user can also specify boot memory
+         size through boot parameter 'fadump_reserve_mem=' which
+         will override the default calculated size.
+
+-- After the low memory (boot memory) area has been saved, the
+   firmware will reset PCI and other hardware state.  It will
+   *not* clear the RAM. It will then launch the bootloader, as
+   normal.
+
+-- The freshly booted kernel will notice that there is a new
+   node (ibm,dump-kernel) in the device tree, indicating that
+   there is crash data available from a previous boot. During
+   the early boot OS will reserve rest of the memory above
+   boot memory size effectively booting with restricted memory
+   size. This will make sure that the second kernel will not
+   touch any of the dump memory area.
+
+-- Userspace tools will read /proc/vmcore to obtain the contents
+   of memory, which holds the previous crashed kernel dump in ELF
+   format. The userspace tools may copy this info to disk, or
+   network, nas, san, iscsi, etc. as desired.
+
+-- Once the userspace tool is done saving dump, it will echo
+   '1' to /sys/kernel/fadump_release_mem to release the reserved
+   memory back to general use, except the memory required for
+   next firmware-assisted dump registration.
+
+   e.g.
+     # echo 1 > /sys/kernel/fadump_release_mem
+
+Please note that the firmware-assisted dump feature
+is only available on Power6 and above systems with recent
+firmware versions.
+
+Implementation details:
+----------------------
+
+During boot, a check is made to see if firmware supports
+this feature on that particular machine. If it does, then
+we check to see if an active dump is waiting for us. If yes
+then everything but boot memory size of RAM is reserved during
+early boot (See Fig. 2). This area is released once we collect a
+dump from user land scripts (kdump scripts) that are run. If
+there is dump data, then the /sys/kernel/fadump_release_mem
+file is created, and the reserved memory is held.
+
+If there is no waiting dump data, then only the memory required
+to hold CPU state, HPTE region, boot memory dump and elfcore
+header, is reserved at the top of memory (see Fig. 1). This area
+is *not* released: this region will be kept permanently reserved,
+so that it can act as a receptacle for a copy of the boot memory
+content in addition to CPU state and HPTE region, in the case a
+crash does occur.
+
+  o Memory Reservation during first kernel
+
+  Low memory                                        Top of memory
+  0      boot memory size                                       |
+  |           |                       |<--Reserved dump area -->|
+  V           V                       |   Permanent Reservation V
+  +-----------+----------/ /----------+---+----+-----------+----+
+  |           |                       |CPU|HPTE|  DUMP     |ELF |
+  +-----------+----------/ /----------+---+----+-----------+----+
+        |                                           ^
+        |                                           |
+        \                                           /
+         -------------------------------------------
+          Boot memory content gets transferred to
+          reserved area by firmware at the time of
+          crash
+                   Fig. 1
+
+  o Memory Reservation during second kernel after crash
+
+  Low memory                                        Top of memory
+  0      boot memory size                                       |
+  |           |<------------- Reserved dump area ----------- -->|
+  V           V                                                 V
+  +-----------+----------/ /----------+---+----+-----------+----+
+  |           |                       |CPU|HPTE|  DUMP     |ELF |
+  +-----------+----------/ /----------+---+----+-----------+----+
+        |                                                    |
+        V                                                    V
+   Used by second                                    /proc/vmcore
+   kernel to boot
+                   Fig. 2
+
+Currently the dump will be copied from /proc/vmcore to a
+a new file upon user intervention. The dump data available through
+/proc/vmcore will be in ELF format. Hence the existing kdump
+infrastructure (kdump scripts) to save the dump works fine
+with minor modifications. The kdump script requires following
+modifications:
+-- During service kdump start if /proc/vmcore entry is not present,
+   look for the existence of /sys/kernel/fadump_enabled and read
+   value exported by it. If value is set to '0' then fallback to
+   existing kexec based kdump. If value is set to '1' then check the
+   value exported by /sys/kernel/fadump_registered. If value it set
+   to '1' then print success otherwise register for fadump by
+   echo'ing 1 > /sys/kernel/fadump_registered file.
+
+-- During service kdump start if /proc/vmcore entry is present,
+   execute the existing routine to save the dump. Once the dump
+   is saved, echo 1 > /sys/kernel/fadump_release_mem (if the
+   file exists) to release the reserved memory for general use
+   and continue without rebooting. At this point the memory
+   reservation map will look like as shown in Fig. 1. If the file
+   /sys/kernel/fadump_release_mem is not present then follow
+   the existing routine to reboot into new kernel.
+
+-- During service kdump stop echo 0 > /sys/kernel/fadump_registered
+   to un-register the fadump.
+
+The tools to examine the dump will be same as the ones
+used for kdump.
+
+How to enable firmware-assisted dump (fadump):
+-------------------------------------
+
+1. Set config option CONFIG_FA_DUMP=y and build kernel.
+2. Boot into linux kernel with 'fadump=1' kernel cmdline option.
+3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline
+   to specify size of the memory to reserve for boot memory dump
+   preservation.
+
+NOTE: If firmware-assisted dump fails to reserve memory then it will
+   fallback to existing kdump mechanism if 'crashkernel=' option
+   is set at kernel cmdline.
+
+Sysfs/debugfs files:
+------------
+
+Firmware-assisted dump feature uses sysfs file system to hold
+the control files and debugfs file to display memory reserved region.
+
+Here is the list of files under kernel sysfs:
+
+ /sys/kernel/fadump_enabled
+
+    This is used to display the fadump status.
+    0 = fadump is disabled
+    1 = fadump is enabled
+
+ /sys/kernel/fadump_registered
+
+    This is used to display the fadump registration status as well
+    as to control (start/stop) the fadump registration.
+    0 = fadump is not registered.
+    1 = fadump is registered and ready to handle system crash.
+
+    To register fadump echo 1 > /sys/kernel/fadump_registered and
+    echo 0 > /sys/kernel/fadump_registered for un-register and stop the
+    fadump. Once the fadump is un-registered, the system crash will not
+    be handled and vmcore will not be captured.
+
+ /sys/kernel/fadump_release_mem
+
+    This file is available only when fadump is active during
+    second kernel. This is used to release the reserved memory
+    region that are held for saving crash dump. To release the
+    reserved memory echo 1 to it:
+
+    echo 1  > /sys/kernel/fadump_release_mem
+
+    After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
+    file will change to reflect the new memory reservations.
+
+Here is the list of files under powerpc debugfs:
+(Assuming debugfs is mounted on /sys/kernel/debug directory.)
+
+ /sys/kernel/debug/powerpc/fadump_region
+
+    This file shows the reserved memory regions if fadump is
+    enabled otherwise this file is empty. The output format
+    is:
+    <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+
+    e.g.
+    Contents when fadump is registered during first kernel
+
+    # cat /sys/kernel/debug/powerpc/fadump_region
+    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+
+    Contents when fadump is active during second kernel
+
+    # cat /sys/kernel/debug/powerpc/fadump_region
+    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+        : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+
+NOTE: Please refer to debugfs documentation on how to mount the debugfs
+      filesystem.
+
+
+TODO:
+-----
+ o Need to come up with the better approach to find out more
+   accurate boot memory size that is required for a kernel to
+   boot successfully when booted with restricted memory.
+ o The fadump implementation introduces a fadump crash info structure
+   in the scratch area before the ELF core header. The idea of introducing
+   this structure is to pass some important crash info data to the second
+   kernel which will help second kernel to populate ELF core header with
+   correct data before it gets exported through /proc/vmcore. The current
+   design implementation does not address a possibility of introducing
+   additional fields (in future) to this structure without affecting
+   compatibility. Need to come up with the better approach to address this.
+   The possible approaches are:
+	1. Introduce version field for version tracking, bump up the version
+	whenever a new field is added to the structure in future. The version
+	field can be used to find out what fields are valid for the current
+	version of the structure.
+	2. Reserve the area of predefined size (say PAGE_SIZE) for this
+	structure and have unused area as reserved (initialized to zero)
+	for future field additions.
+   The advantage of approach 1 over 2 is we don't need to reserve extra space.
+---
+Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+This document is based on the original documentation written for phyp
+assisted dump by Linas Vepstas and Manish Ahuja.

^ permalink raw reply related

* [RFC PATCH v3 00/10] fadump: Firmware-assisted dump support for Powerpc.
From: Mahesh J Salgaonkar @ 2011-10-31 17:04 UTC (permalink / raw)
  To: linuxppc-dev, Linux Kernel, Benjamin Herrenschmidt
  Cc: Anton Blanchard, Eric W. Biederman, Milton Miller, Amerigo Wang

Hi All,

Please find the version 3 of the patchset that implements firmware-assisted
dump mechanism to capture kernel crash dump for Powerpc architecture. The
firmware-assisted dump is a robust mechanism to get reliable kernel crash
dump with assistance from firmware. This approach does not use kexec, instead
firmware assists in booting the kdump kernel while preserving memory contents.

Changes in v3:
-------------
- Re-factored the implementation to work with kdump service start/stop.
  Introduced fadump_registered sysfs control file which will be used by
  kdump init scripts to start/stop firmware assisted dump. echo 1 to
  /sys/kernel/fadump_registered file for fadump registration and
  echo 0 to /sys/kernel/fadump_registered file for fadump un-registration.
- Introduced the locking mechanism to handle simultaneous writes to
  sysfs control files fadump_registered and fadump_release_mem

  Affected patches are: 01/10, 03/10, 08/10.

Changes in v2:
-------------
patch 01/10:
- Modified the documentation to reflect the change of fadump_region
  file under debugfs filesystem.

patch 02/10:
- Modified to use standard pr_debug() macro.
- Modified early_init_dt_scan_fw_dump() to get the size of
  "ibm,configure-kernel-dump-sizes" property and use it to iterate through
  an array of dump sections.
- Introduced boot option 'fadump_reserve_mem=' to let user specify the
  fadump boot memory to be reserved.

patch 03/10:
- Removed few debug print statements.
- Moved the setup_fadump() call from setup_system() and now calling it
  subsys_initcall.
- Moved fadump_region attribute under debugfs.
- Clear the TCE entries if firmware assisted dump is active.

patch 05/10:
- Moved the crash_fadump() invocation from generic code to panic notifier.
- Introduced cpu_notes_buf_alloc() function to allocate cpu notes buffer
  using get_free_pages().

patch 08/10:
- Introduced cpu_notes_buf_free() function to free memory allocated for
  cpu notes buffer.

The most of the code implementation has been adapted from phyp assisted dump
implementation written by Linas Vepstas and Manish Ahuja.

The first patch is a documentation that talks about firmware-assisted dump
mechanism, implementation details and TODO list.

I have tested the patches on following system configuration:
1. LPAR on Power6 with 4GB RAM and 8 CPUs
2. LPAR on Power7 with 2GB RAM and 20 CPUs
3. LPAR on Power7 with 1TB RAM and 896 CPUs

These patches cleanly apply on commit c3b92c878 in linux-2.6 git tree.

Please review the patchset and let me know your comments.

Thanks,
-Mahesh.

---

Mahesh Salgaonkar (10):
      fadump: Add documentation for firmware-assisted dump.
      fadump: Reserve the memory for firmware assisted dump.
      fadump: Register for firmware assisted dump.
      fadump: Initialize elfcore header and add PT_LOAD program headers.
      fadump: Convert firmware-assisted cpu state dump data into elf notes.
      fadump: Add PT_NOTE program header for vmcoreinfo
      fadump: Introduce cleanup routine to invalidate /proc/vmcore.
      fadump: Invalidate registration and release reserved memory for general use.
      fadump: Invalidate the fadump registration during machine shutdown.
      fadump: Introduce config option for firmware assisted dump feature


 Documentation/powerpc/firmware-assisted-dump.txt |  262 ++++
 arch/powerpc/Kconfig                             |   13 
 arch/powerpc/include/asm/fadump.h                |  205 ++++
 arch/powerpc/kernel/Makefile                     |    1 
 arch/powerpc/kernel/fadump.c                     | 1284 ++++++++++++++++++++++
 arch/powerpc/kernel/iommu.c                      |    8 
 arch/powerpc/kernel/prom.c                       |   15 
 arch/powerpc/kernel/setup-common.c               |   16 
 arch/powerpc/kernel/traps.c                      |    5 
 arch/powerpc/mm/hash_utils_64.c                  |   11 
 fs/proc/vmcore.c                                 |   23 
 include/linux/crash_dump.h                       |    1 
 include/linux/memblock.h                         |    1 
 kernel/crash_dump.c                              |   33 +
 14 files changed, 1876 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/powerpc/firmware-assisted-dump.txt
 create mode 100644 arch/powerpc/include/asm/fadump.h
 create mode 100644 arch/powerpc/kernel/fadump.c

-- 
Signature

^ permalink raw reply

* Re: [4/4] powerpc/booke: Re-organize debug code
From: Kumar Gala @ 2011-10-31 14:21 UTC (permalink / raw)
  To: Jimi Xenidis; +Cc: linuxppc-dev
In-Reply-To: <5EF5AFC4-C852-4D1C-9019-D1CBAE1157EA@pobox.com>


On Oct 28, 2011, at 2:37 PM, Jimi Xenidis wrote:

>=20
> On Oct 5, 2011, at 9:53 PM, Kumar Gala wrote:
>=20
>> * set_dabr/do_dabr are no longer used when CNFIG_PPC_ADV_DEBUG_REGS =
is set
>> refactor code a bit such that we only build the dabr code for
>> !CONFIG_PPC_ADV_DEBUG_REGS and removed some CONFIG_PPC_ADV_DEBUG_REGS
>> code in set_dabr that would never get built.
>>=20
>> * Move do_send_trap into traps.c as its only used there
>>=20
>> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
>>=20
>> ---
>> arch/powerpc/include/asm/system.h |    5 +--
>> arch/powerpc/kernel/process.c     |   97 =
+++++++++++++-----------------------
>> arch/powerpc/kernel/traps.c       |   17 +++++++
>> 3 files changed, 53 insertions(+), 66 deletions(-)
>>=20
>> diff --git a/arch/powerpc/include/asm/system.h =
b/arch/powerpc/include/asm/system.h
>> index e30a13d..1dc5d9c 100644
>> --- a/arch/powerpc/include/asm/system.h
>> +++ b/arch/powerpc/include/asm/system.h
>> @@ -111,11 +111,8 @@ static inline int debugger_dabr_match(struct =
pt_regs *regs) { return 0; }
>> static inline int debugger_fault_handler(struct pt_regs *regs) { =
return 0; }
>> #endif
>>=20
>> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
>> extern int set_dabr(unsigned long dabr);
>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> -extern void do_send_trap(struct pt_regs *regs, unsigned long =
address,
>> -			 unsigned long error_code, int signal_code, int =
brkpt);
>> -#else
>=20
>=20
> This part of the patch breaks xmon.c
> Naively I simply wrapped the xmon call:
>=20
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index f08836a..b5911b2 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -738,8 +738,10 @@ static void insert_bpts(void)
>=20
> static void insert_cpu_bpts(void)
> {
> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
> 	if (dabr.enabled)
> 		set_dabr(dabr.address | (dabr.enabled & 7));
> +#endif
> 	if (iabr && cpu_has_feature(CPU_FTR_IABR))
> 		mtspr(SPRN_IABR, iabr->address
> 			 | (iabr->enabled & (BP_IABR|BP_IABR_TE)));
> @@ -767,7 +769,9 @@ static void remove_bpts(void)
>=20
> static void remove_cpu_bpts(void)
> {
> +#ifndef CONFIG_PPC_ADV_DEBUG_REGS
> 	set_dabr(0);
> +#endif
> 	if (cpu_has_feature(CPU_FTR_IABR))
> 		mtspr(SPRN_IABR, 0);
> }

Shouldn't all of these functions be #ifndef'd out as we don't support =
cpu_bpts on book-e parts in xmon code today?

>=20
> -JX
>=20
>=20
>> extern void do_dabr(struct pt_regs *regs, unsigned long address,
>> 		    unsigned long error_code);
>> #endif
>> diff --git a/arch/powerpc/kernel/process.c =
b/arch/powerpc/kernel/process.c
>> index 269a309..989e574 100644
>> --- a/arch/powerpc/kernel/process.c
>> +++ b/arch/powerpc/kernel/process.c
>> @@ -251,50 +251,6 @@ void discard_lazy_cpu_state(void)
>> #endif /* CONFIG_SMP */
>>=20
>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> -void do_send_trap(struct pt_regs *regs, unsigned long address,
>> -		  unsigned long error_code, int signal_code, int =
breakpt)
>> -{
>> -	siginfo_t info;
>> -
>> -	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>> -			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>> -		return;
>> -
>> -	/* Deliver the signal to userspace */
>> -	info.si_signo =3D SIGTRAP;
>> -	info.si_errno =3D breakpt;	/* breakpoint or watchpoint id =
*/
>> -	info.si_code =3D signal_code;
>> -	info.si_addr =3D (void __user *)address;
>> -	force_sig_info(SIGTRAP, &info, current);
>> -}
>> -#else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
>> -void do_dabr(struct pt_regs *regs, unsigned long address,
>> -		    unsigned long error_code)
>> -{
>> -	siginfo_t info;
>> -
>> -	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>> -			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>> -		return;
>> -
>> -	if (debugger_dabr_match(regs))
>> -		return;
>> -
>> -	/* Clear the DABR */
>> -	set_dabr(0);
>> -
>> -	/* Deliver the signal to userspace */
>> -	info.si_signo =3D SIGTRAP;
>> -	info.si_errno =3D 0;
>> -	info.si_code =3D TRAP_HWBKPT;
>> -	info.si_addr =3D (void __user *)address;
>> -	force_sig_info(SIGTRAP, &info, current);
>> -}
>> -#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>> -
>> -static DEFINE_PER_CPU(unsigned long, current_dabr);
>> -
>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> /*
>> * Set the debug registers back to their default "safe" values.
>> */
>> @@ -357,16 +313,7 @@ static void switch_booke_debug_regs(struct =
thread_struct *new_thread)
>> 			prime_debug_regs(new_thread);
>> }
>> #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
>> -#ifndef CONFIG_HAVE_HW_BREAKPOINT
>> -static void set_debug_reg_defaults(struct thread_struct *thread)
>> -{
>> -	if (thread->dabr) {
>> -		thread->dabr =3D 0;
>> -		set_dabr(0);
>> -	}
>> -}
>> -#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
>> -#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>> +static DEFINE_PER_CPU(unsigned long, current_dabr);
>>=20
>> int set_dabr(unsigned long dabr)
>> {
>> @@ -376,19 +323,45 @@ int set_dabr(unsigned long dabr)
>> 		return ppc_md.set_dabr(dabr);
>>=20
>> 	/* XXX should we have a CPU_FTR_HAS_DABR ? */
>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> -	mtspr(SPRN_DAC1, dabr);
>> -#ifdef CONFIG_PPC_47x
>> -	isync();
>> -#endif
>> -#elif defined(CONFIG_PPC_BOOK3S)
>> 	mtspr(SPRN_DABR, dabr);
>> -#endif
>> -
>>=20
>> 	return 0;
>> }
>>=20
>> +void do_dabr(struct pt_regs *regs, unsigned long address,
>> +		    unsigned long error_code)
>> +{
>> +	siginfo_t info;
>> +
>> +	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>> +			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>> +		return;
>> +
>> +	if (debugger_dabr_match(regs))
>> +		return;
>> +
>> +	/* Clear the DABR */
>> +	set_dabr(0);
>> +
>> +	/* Deliver the signal to userspace */
>> +	info.si_signo =3D SIGTRAP;
>> +	info.si_errno =3D 0;
>> +	info.si_code =3D TRAP_HWBKPT;
>> +	info.si_addr =3D (void __user *)address;
>> +	force_sig_info(SIGTRAP, &info, current);
>> +}
>> +
>> +#ifndef CONFIG_HAVE_HW_BREAKPOINT
>> +static void set_debug_reg_defaults(struct thread_struct *thread)
>> +{
>> +	if (thread->dabr) {
>> +		thread->dabr =3D 0;
>> +		set_dabr(0);
>> +	}
>> +}
>> +#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
>> +#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
>> +
>> #ifdef CONFIG_PPC64
>> DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
>> #endif
>> diff --git a/arch/powerpc/kernel/traps.c =
b/arch/powerpc/kernel/traps.c
>> index db733d3..edc1108 100644
>> --- a/arch/powerpc/kernel/traps.c
>> +++ b/arch/powerpc/kernel/traps.c
>> @@ -1184,6 +1184,23 @@ void SoftwareEmulation(struct pt_regs *regs)
>> #endif /* CONFIG_8xx */
>>=20
>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> +static void do_send_trap(struct pt_regs *regs, unsigned long =
address,
>> +		  unsigned long error_code, int signal_code, int =
breakpt)
>> +{
>> +	siginfo_t info;
>> +
>> +	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
>> +			11, SIGSEGV) =3D=3D NOTIFY_STOP)
>> +		return;
>> +
>> +	/* Deliver the signal to userspace */
>> +	info.si_signo =3D SIGTRAP;
>> +	info.si_errno =3D breakpt;	/* breakpoint or watchpoint id =
*/
>> +	info.si_code =3D signal_code;
>> +	info.si_addr =3D (void __user *)address;
>> +	force_sig_info(SIGTRAP, &info, current);
>> +}
>> +
>> static void handle_debug(struct pt_regs *regs, unsigned long =
debug_status)
>> {
>> 	int changed =3D 0;

^ permalink raw reply

* Re: RFC: ESR_I/DLK processing
From: Kumar Gala @ 2011-10-31 14:19 UTC (permalink / raw)
  To: Jimi Xenidis; +Cc: linuxppc-dev
In-Reply-To: <00A1F69B-FCB7-4F45-8BE4-4C4B35D53D59@pobox.com>


On Oct 28, 2011, at 3:43 PM, Jimi Xenidis wrote:

> arch/powerpc/kernel/head_fsl_booke.S has the following code:
>> 	/* Data Storage Interrupt */
>> 	START_EXCEPTION(DataStorage)
>> 	NORMAL_EXCEPTION_PROLOG
>> 	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass =
arg3 */
>> 	stw	r5,_ESR(r11)
>> 	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass =
arg2 */
>> 	andis.	r10,r5,(ESR_ILK|ESR_DLK)@h
>> 	bne	1f
>> 	EXC_XFER_EE_LITE(0x0300, handle_page_fault)
>> 1:
>> 	addi	r3,r1,STACK_FRAME_OVERHEAD
>> 	EXC_XFER_EE_LITE(0x0300, CacheLockingException)
>=20
>=20
> I need something similar for A2 (and all book3e) and was wondering, =
why this isn't just:
>=20
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 88abe70..8451822 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -159,6 +159,14 @@ int __kprobes do_page_fault(struct pt_regs =
*regs, unsigned long address,
>> 	}
>> #endif
>>=20
>> +#ifdef CONFIG_PPC_BOOK3E
>> +	if (error_code & (ESR_DLK|ESR_ILK)) {
>> +		/* detect that this is a privileged op and SIGILL */
>> +		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
>> +		return 0;
>> +	}
>> +#endif
>> +
>> 	if (notify_page_fault(regs))
>> 		return 0;
>=20
> Its not like this need to be fast or anything.
> I'd be happy to submit a patch that adds to fault.c and removed the =
I/DLK processing from head_fsl_booke.S
>=20
> Thoughts?
> -jx

Probably because at one point in time DSI had a fast path handling for =
us.  I've got no issues w/the proposed patch, just remember to mixup the =
CONFIG_PPC_BOOK3E as we don't define that in 32-bit fsl-booke=20

- k=

^ permalink raw reply

* Re: [1/4] powerpc: Revert show_regs() define for readability
From: Kumar Gala @ 2011-10-31 14:18 UTC (permalink / raw)
  To: Jimi Xenidis; +Cc: Linuxppc-dev list
In-Reply-To: <1ADB010A-A48C-4E71-91E0-94D60A3E676F@pobox.com>


On Oct 28, 2011, at 2:40 PM, Jimi Xenidis wrote:

> 
> On Oct 5, 2011, at 9:53 PM, Kumar Gala wrote:
> 
>> We had an existing ifdef for 4xx & BOOKE processors that got changed to
>> CONFIG_PPC_ADV_DEBUG_REGS.  The define has nothing to do with
>> CONFIG_PPC_ADV_DEBUG_REGS.  The define really should be:
>> 
>> #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>> 
>> and not
>> 
>> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> 
>> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
>> 
>> ---
>> arch/powerpc/kernel/process.c |    2 +-
>> 1 files changed, 1 insertions(+), 1 deletions(-)
>> 
>> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
>> index 8f53954..a1b5981 100644
>> --- a/arch/powerpc/kernel/process.c
>> +++ b/arch/powerpc/kernel/process.c
>> @@ -657,7 +657,7 @@ void show_regs(struct pt_regs * regs)
>> 	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
>> 		printk("CFAR: "REG"\n", regs->orig_gpr3);
>> 	if (trap == 0x300 || trap == 0x600)
>> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS
>> +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
>> 		printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
> 
> I'll be needing "|| defined(CONFIG_PPC_BOOK3E)" added to this please.
> -jx

Under what platform is CONFIG_PPC_BOOK3E set and CONFIG_BOOKE is not?

- k

^ permalink raw reply

* Re: [RFC][PATCH 1/2] uio: allow drivers to override the pgprot for mmap
From: Kumar Gala @ 2011-10-31 13:44 UTC (permalink / raw)
  To: Greg KH; +Cc: linuxppc-dev, Hans J. Koch, linux-kernel
In-Reply-To: <20111029063809.GC2280@kroah.com>


On Oct 29, 2011, at 1:38 AM, Greg KH wrote:

> On Fri, Oct 28, 2011 at 11:48:12PM +0200, Hans J. Koch wrote:
>> On Fri, Oct 28, 2011 at 10:50:29AM -0500, Kumar Gala wrote:
>>> For some devices, the default behavior of pgprot_noncached() is not
>>> appropriate for all of its mappable regions. This provides a means =
for
>>> the kernel side of the UIO driver to override the flags without =
having
>>> to implement its own full mmap callback.
>>=20
>> Thanks for also providing an example driver showing the use of this.
>> You should also post this driver in a mainline-ready version, I'm a =
bit
>> uncomfortable with adding a new function pointer without having any =
users.
>=20
> I'm more than "uncomfortable", I'll refuse to take any such patch =
unless
> there is a in-kernel user, otherwise it makes no sense to add the
> pointer at all.
>=20
> thanks,
>=20
> greg k-h

I'm in agreement with this view.  I wanted to post this to make sure the =
direction we took was ok so when the upstream driver is posted this =
patch / change isn't a concern.

- k=

^ permalink raw reply

* Re: powerpc 476, Little-endian, pte fault
From: Benjamin Herrenschmidt @ 2011-10-31 11:23 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, Ian Munsie, Santosh Kumar, linux-kernel
In-Reply-To: <20440.1320054588@neuling.org>

On Mon, 2011-10-31 at 20:49 +1100, Michael Neuling wrote:
> > I have built a cross compiler for ppc440 in little endian mode and
> > using it to build the kernel.
> > 
> > Yes i am running Linux in Little-Endian. This is the first user space
> > process. I wrote the below program and running it as init from
> > /sbin/init. I have also set the permissions with chmod +s.
> > 
> > main()
> > {
> > 
> > while(1){
> > printf("hello world");
> > sleep(1);
> >  }
> > }
> 
> Does libc even support little endian on PPC?

Ian did a port a while back for uClibc, is that at least partially based
on it ?

> > I have attached the patch.
> 
> This is a pretty huge patch:
> 
>  115 files changed, 44479 insertions(+), 7398 deletions(-)
> 
> It seems to include a new platform as well as a bunch of unrelated junk.
>
> I suggest you need to break this down into something more digestible.
> Like remove all the junk in the patch.  Then add the support for the new
> platform (invader? platform).  Then start looking at little endian.
> Unless you do this, it's unlikely anyone here is going to be able to
> help.
>
> When you get to the little endian work, you might want to take a look at
> this patch series from Ian Munsie:
> 
> http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-October/086165.html

Right, the new patch should be if possible based on Ian's series or at
least a cleaned / rebased variant of it. Then split in bits so we can
review it properly.

Cheers,
Ben.

> Mikey
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* [PATCH 2/2] NAND Machine support for Integrated Flash Controller
From: b35362 @ 2011-10-31  9:38 UTC (permalink / raw)
  To: dwmw2, Artem.Bityutskiy
  Cc: r58472, linux-kernel, linux-mtd, scottwood, akpm, linuxppc-dev
In-Reply-To: <1320053901-23801-1-git-send-email-b35362@freescale.com>

From: Liu Shuo <b35362@freescale.com>

Integrated Flash Controller(IFC) can be used to hook NAND Flash
chips using NAND Flash Machine available on it.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Liu Shuo <b35362@freescale.com>
---
 drivers/mtd/nand/Kconfig        |   10 +
 drivers/mtd/nand/Makefile       |    1 +
 drivers/mtd/nand/fsl_ifc_nand.c | 1076 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 1087 insertions(+), 0 deletions(-)
 create mode 100644 drivers/mtd/nand/fsl_ifc_nand.c

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 4c34252..126d9cc 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -456,6 +456,16 @@ config MTD_NAND_FSL_ELBC
 	  Enabling this option will enable you to use this to control
 	  external NAND devices.
 
+config MTD_NAND_FSL_IFC
+	tristate "NAND support for Freescale IFC controller"
+	depends on MTD_NAND && FSL_SOC
+	select FSL_IFC
+	help
+	  Various Freescale chips e.g P1010, include a NAND Flash machine
+	  with built-in hardware ECC capabilities.
+	  Enabling this option will enable you to use this to control
+	  external NAND devices.
+
 config MTD_NAND_FSL_UPM
 	tristate "Support for NAND on Freescale UPM"
 	depends on PPC_83xx || PPC_85xx
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 5745d83..3094131 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_MTD_ALAUDA)		+= alauda.o
 obj-$(CONFIG_MTD_NAND_PASEMI)		+= pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)		+= orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)		+= fsl_elbc_nand.o
+obj-$(CONFIG_MTD_NAND_FSL_IFC)		+= fsl_ifc_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_UPM)		+= fsl_upm.o
 obj-$(CONFIG_MTD_NAND_SH_FLCTL)		+= sh_flctl.o
 obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
new file mode 100644
index 0000000..2c9116c
--- /dev/null
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -0,0 +1,1076 @@
+/*
+ * Freescale Integrated Flash Controller NAND driver
+ *
+ * Copyright 2011 Freescale Semiconductor, Inc
+ *
+ * Author: Dipen Dudhat <Dipen.Dudhat@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand_ecc.h>
+#include <asm/fsl_ifc.h>
+
+#define ERR_BYTE		0xFF /* Value returned for read
+					bytes when read failed	*/
+#define IFC_TIMEOUT_MSECS	500  /* Maximum number of mSecs to wait
+					for IFC NAND Machine	*/
+
+struct fsl_ifc_ctrl;
+
+/* mtd information per set */
+struct fsl_ifc_mtd {
+	struct mtd_info mtd;
+	struct nand_chip chip;
+	struct fsl_ifc_ctrl *ctrl;
+
+	struct device *dev;
+	int bank;		/* Chip select bank number		*/
+	unsigned int bufnum_mask; /* bufnum = page & bufnum_mask */
+	u8 __iomem *vbase;      /* Chip select base virtual address	*/
+};
+
+/* overview of the fsl ifc controller */
+struct fsl_ifc_nand_ctrl {
+	struct nand_hw_control controller;
+	struct fsl_ifc_mtd *chips[FSL_IFC_BANK_COUNT];
+
+	u8 __iomem *addr;	/* Address of assigned IFC buffer	*/
+	unsigned int page;	/* Last page written to / read from	*/
+	unsigned int read_bytes;/* Number of bytes read during command	*/
+	unsigned int column;	/* Saved column from SEQIN		*/
+	unsigned int index;	/* Pointer to next byte to 'read'	*/
+	unsigned int oob;	/* Non zero if operating on OOB data	*/
+	unsigned int eccread;	/* Non zero for a full-page ECC read	*/
+	unsigned int counter;	/* counter for the initializations	*/
+};
+
+static struct fsl_ifc_nand_ctrl *ifc_nand_ctrl;
+
+/* 512-byte page with 4-bit ECC, 8-bit */
+static struct nand_ecclayout oob_512_8bit_ecc4 = {
+	.eccbytes = 8,
+	.eccpos = {8, 9, 10, 11, 12, 13, 14, 15},
+	.oobfree = { {0, 5}, {6, 2} },
+};
+
+/* 512-byte page with 4-bit ECC, 16-bit */
+static struct nand_ecclayout oob_512_16bit_ecc4 = {
+	.eccbytes = 8,
+	.eccpos = {8, 9, 10, 11, 12, 13, 14, 15},
+	.oobfree = { {2, 6}, },
+};
+
+/* 2048-byte page size with 4-bit ECC */
+static struct nand_ecclayout oob_2048_ecc4 = {
+	.eccbytes = 32,
+	.eccpos = {
+		8, 9, 10, 11, 12, 13, 14, 15,
+		16, 17, 18, 19, 20, 21, 22, 23,
+		24, 25, 26, 27, 28, 29, 30, 31,
+		32, 33, 34, 35, 36, 37, 38, 39,
+	},
+	.oobfree = { {2, 6}, {40, 24} },
+};
+
+/* 4096-byte page size with 4-bit ECC */
+static struct nand_ecclayout oob_4096_ecc4 = {
+	.eccbytes = 64,
+	.eccpos = {
+		8, 9, 10, 11, 12, 13, 14, 15,
+		16, 17, 18, 19, 20, 21, 22, 23,
+		24, 25, 26, 27, 28, 29, 30, 31,
+		32, 33, 34, 35, 36, 37, 38, 39,
+		40, 41, 42, 43, 44, 45, 46, 47,
+		48, 49, 50, 51, 52, 53, 54, 55,
+		56, 57, 58, 59, 60, 61, 62, 63,
+		64, 65, 66, 67, 68, 69, 70, 71,
+	},
+	.oobfree = { {2, 6}, {72, 56} },
+};
+
+/* 4096-byte page size with 8-bit ECC -- requires 218-byte OOB */
+static struct nand_ecclayout oob_4096_ecc8 = {
+	.eccbytes = 128,
+	.eccpos = {
+		8, 9, 10, 11, 12, 13, 14, 15,
+		16, 17, 18, 19, 20, 21, 22, 23,
+		24, 25, 26, 27, 28, 29, 30, 31,
+		32, 33, 34, 35, 36, 37, 38, 39,
+		40, 41, 42, 43, 44, 45, 46, 47,
+		48, 49, 50, 51, 52, 53, 54, 55,
+		56, 57, 58, 59, 60, 61, 62, 63,
+		64, 65, 66, 67, 68, 69, 70, 71,
+		72, 73, 74, 75, 76, 77, 78, 79,
+		80, 81, 82, 83, 84, 85, 86, 87,
+		88, 89, 90, 91, 92, 93, 94, 95,
+		96, 97, 98, 99, 100, 101, 102, 103,
+		104, 105, 106, 107, 108, 109, 110, 111,
+		112, 113, 114, 115, 116, 117, 118, 119,
+		120, 121, 122, 123, 124, 125, 126, 127,
+		128, 129, 130, 131, 132, 133, 134, 135,
+	},
+	.oobfree = { {2, 6}, {136, 82} },
+};
+
+
+/*
+ * Generic flash bbt descriptors
+ */
+static u8 bbt_pattern[] = {'B', 'b', 't', '0' };
+static u8 mirror_pattern[] = {'1', 't', 'b', 'B' };
+
+static struct nand_bbt_descr bbt_main_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE |
+		   NAND_BBT_2BIT | NAND_BBT_VERSION,
+	.offs =	2, /* 0 on 8-bit small page */
+	.len = 4,
+	.veroffs = 6,
+	.maxblocks = 4,
+	.pattern = bbt_pattern,
+};
+
+static struct nand_bbt_descr bbt_mirror_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE |
+		   NAND_BBT_2BIT | NAND_BBT_VERSION,
+	.offs =	2, /* 0 on 8-bit small page */
+	.len = 4,
+	.veroffs = 6,
+	.maxblocks = 4,
+	.pattern = mirror_pattern,
+};
+
+/*
+ * Set up the IFC hardware block and page address fields, and the ifc nand
+ * structure addr field to point to the correct IFC buffer in memory
+ */
+static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	int buf_num;
+
+	ifc_nand_ctrl->page = page_addr;
+	/* Program ROW0/COL0 */
+	out_be32(&ifc->ifc_nand.row0, page_addr);
+	out_be32(&ifc->ifc_nand.col0, (oob ? IFC_NAND_COL_MS : 0) | column);
+
+	buf_num = page_addr & priv->bufnum_mask;
+
+	ifc_nand_ctrl->addr = priv->vbase + buf_num * (mtd->writesize * 2);
+	ifc_nand_ctrl->index = column;
+
+	/* for OOB data point to the second half of the buffer */
+	if (oob)
+		ifc_nand_ctrl->index += mtd->writesize;
+}
+
+static int is_blank(struct mtd_info *mtd, unsigned int bufnum)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	u8 __iomem *addr = priv->vbase + bufnum * (mtd->writesize * 2);
+	u32 __iomem *mainarea = (u32 *)addr;
+	u8 __iomem *oob = addr + mtd->writesize;
+	int i;
+
+	for (i = 0; i < mtd->writesize / 4; i++) {
+		if (__raw_readl(&mainarea[i]) != 0xffffffff)
+			return 0;
+	}
+
+	for (i = 0; i < chip->ecc.layout->eccbytes; i++) {
+		int pos = chip->ecc.layout->eccpos[i];
+
+		if (__raw_readb(&oob[pos]) != 0xff)
+			return 0;
+	}
+
+	return 1;
+}
+
+/* returns nonzero if entire page is blank */
+static int check_read_ecc(struct mtd_info *mtd, struct fsl_ifc_ctrl *ctrl,
+			  u32 *eccstat, unsigned int bufnum)
+{
+	u32 reg = eccstat[bufnum / 4];
+	int errors = (reg >> ((3 - bufnum % 4) * 8)) & 15;
+
+	if (errors == 15) { /* uncorrectable */
+		/* Blank pages fail hw ECC checks */
+		if (is_blank(mtd, bufnum))
+			return 1;
+
+		/*
+		 * We disable ECCER reporting in hardware due to
+		 * erratum IFC-A002770 -- so report it now if we
+		 * see an uncorrectable error in ECCSTAT.
+		 */
+		ctrl->nand_stat |= IFC_NAND_EVTER_STAT_ECCER;
+	} else if (errors > 0) {
+		mtd->ecc_stats.corrected += errors;
+	}
+
+	return 0;
+}
+
+/*
+ * execute IFC NAND command and wait for it to complete
+ */
+static void fsl_ifc_run_command(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	u32 eccstat[4];
+	int i;
+
+	/* set the chip select for NAND Transaction */
+	out_be32(&ifc->ifc_nand.nand_csel, priv->bank << IFC_NAND_CSEL_SHIFT);
+
+	dev_vdbg(priv->dev,
+			"%s: fir0=%08x fcr0=%08x\n",
+			__func__,
+			in_be32(&ifc->ifc_nand.nand_fir0),
+			in_be32(&ifc->ifc_nand.nand_fcr0));
+
+	ctrl->nand_stat = 0;
+
+	/* start read/write seq */
+	out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT);
+
+	/* wait for command complete flag or timeout */
+	wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
+			   IFC_TIMEOUT_MSECS * HZ/1000);
+
+	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_FTOER)
+		dev_err(priv->dev, "NAND Flash Timeout Error\n");
+	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_WPER)
+		dev_err(priv->dev, "NAND Flash Write Protect Error\n");
+
+	if (nctrl->eccread) {
+		int bufperpage = mtd->writesize / 512;
+		int bufnum = (nctrl->page & priv->bufnum_mask) * bufperpage;
+		int bufnum_end = bufnum + bufperpage - 1;
+
+		for (i = bufnum / 4; i <= bufnum_end / 4; i++)
+			eccstat[i] = in_be32(&ifc->ifc_nand.nand_eccstat[i]);
+
+		for (i = bufnum; i <= bufnum_end; i++) {
+			if (check_read_ecc(mtd, ctrl, eccstat, i))
+				break;
+		}
+
+		nctrl->eccread = 0;
+	}
+}
+
+static void fsl_ifc_do_read(struct nand_chip *chip,
+			    int oob,
+			    struct mtd_info *mtd)
+{
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+
+	/* Program FIR/IFC_NAND_FCR0 for Small/Large page */
+	if (mtd->writesize > 512) {
+		out_be32(&ifc->ifc_nand.nand_fir0,
+			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+			 (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) |
+			 (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT));
+		out_be32(&ifc->ifc_nand.nand_fir1, 0x0);
+
+		out_be32(&ifc->ifc_nand.nand_fcr0,
+			(NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) |
+			(NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT));
+	} else {
+		out_be32(&ifc->ifc_nand.nand_fir0,
+			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			 (IFC_FIR_OP_RA0  << IFC_NAND_FIR0_OP2_SHIFT) |
+			 (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT));
+		out_be32(&ifc->ifc_nand.nand_fir1, 0x0);
+
+		if (oob)
+			out_be32(&ifc->ifc_nand.nand_fcr0,
+				 NAND_CMD_READOOB << IFC_NAND_FCR0_CMD0_SHIFT);
+		else
+			out_be32(&ifc->ifc_nand.nand_fcr0,
+				NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT);
+	}
+}
+
+/* cmdfunc send commands to the IFC NAND Machine */
+static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
+			     int column, int page_addr) {
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+
+	/* clear the read buffer */
+	ifc_nand_ctrl->read_bytes = 0;
+	if (command != NAND_CMD_PAGEPROG)
+		ifc_nand_ctrl->index = 0;
+
+	switch (command) {
+	/* READ0 read the entire buffer to use hardware ECC. */
+	case NAND_CMD_READ0:
+		out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+		set_addr(mtd, 0, page_addr, 0);
+
+		ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize;
+		ifc_nand_ctrl->index += column;
+
+		if (chip->ecc.mode == NAND_ECC_HW)
+			ifc_nand_ctrl->eccread = 1;
+
+		fsl_ifc_do_read(chip, 0, mtd);
+		fsl_ifc_run_command(mtd);
+		return;
+
+	/* READOOB reads only the OOB because no ECC is performed. */
+	case NAND_CMD_READOOB:
+		out_be32(&ifc->ifc_nand.nand_fbcr, mtd->oobsize - column);
+		set_addr(mtd, column, page_addr, 1);
+
+		ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize;
+
+		fsl_ifc_do_read(chip, 1, mtd);
+		fsl_ifc_run_command(mtd);
+
+		return;
+
+	/* READID must read all 8 possible bytes */
+	case NAND_CMD_READID:
+		out_be32(&ifc->ifc_nand.nand_fir0,
+				(IFC_FIR_OP_CMD0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				(IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
+				(IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT));
+		out_be32(&ifc->ifc_nand.nand_fcr0,
+				NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT);
+		/* 8 bytes for manuf, device and exts */
+		out_be32(&ifc->ifc_nand.nand_fbcr, 8);
+		ifc_nand_ctrl->read_bytes = 8;
+
+		set_addr(mtd, 0, 0, 0);
+		fsl_ifc_run_command(mtd);
+		return;
+
+	/* ERASE1 stores the block and page address */
+	case NAND_CMD_ERASE1:
+		set_addr(mtd, 0, page_addr, 0);
+		return;
+
+	/* ERASE2 uses the block and page address from ERASE1 */
+	case NAND_CMD_ERASE2:
+		out_be32(&ifc->ifc_nand.nand_fir0,
+			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			 (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT));
+
+		out_be32(&ifc->ifc_nand.nand_fcr0,
+			 (NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) |
+			 (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT));
+
+		out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+		ifc_nand_ctrl->read_bytes = 0;
+		fsl_ifc_run_command(mtd);
+		return;
+
+	/* SEQIN sets up the addr buffer and all registers except the length */
+	case NAND_CMD_SEQIN: {
+		u32 nand_fcr0;
+		ifc_nand_ctrl->column = column;
+		ifc_nand_ctrl->oob = 0;
+
+		if (mtd->writesize > 512) {
+			nand_fcr0 =
+				(NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD0_SHIFT) |
+				(NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT);
+
+			out_be32(&ifc->ifc_nand.nand_fir0,
+				 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+				 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+				 (IFC_FIR_OP_WBCD  << IFC_NAND_FIR0_OP3_SHIFT) |
+				 (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT));
+		} else {
+			nand_fcr0 = ((NAND_CMD_PAGEPROG <<
+					IFC_NAND_FCR0_CMD1_SHIFT) |
+				    (NAND_CMD_SEQIN <<
+					IFC_NAND_FCR0_CMD2_SHIFT));
+
+			out_be32(&ifc->ifc_nand.nand_fir0,
+				 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				 (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) |
+				 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+				 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) |
+				 (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT));
+			out_be32(&ifc->ifc_nand.nand_fir1,
+				 (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT));
+
+			if (column >= mtd->writesize) {
+				/* OOB area --> READOOB */
+				column -= mtd->writesize;
+				nand_fcr0 |= NAND_CMD_READOOB <<
+						IFC_NAND_FCR0_CMD0_SHIFT;
+				ifc_nand_ctrl->oob = 1;
+			} else if (column < 256)
+				/* First 256 bytes --> READ0 */
+				nand_fcr0 |=
+				NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT;
+			else
+				/* Second 256 bytes --> READ1 */
+				nand_fcr0 |=
+				NAND_CMD_READ1 << IFC_NAND_FCR0_CMD0_SHIFT;
+		}
+
+		out_be32(&ifc->ifc_nand.nand_fcr0, nand_fcr0);
+		set_addr(mtd, column, page_addr, ifc_nand_ctrl->oob);
+		return;
+	}
+
+	/* PAGEPROG reuses all of the setup from SEQIN and adds the length */
+	case NAND_CMD_PAGEPROG: {
+		int full_page;
+		if (ifc_nand_ctrl->oob) {
+			out_be32(&ifc->ifc_nand.nand_fbcr,
+					ifc_nand_ctrl->index);
+			full_page = 0;
+		} else {
+			out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+			full_page = 1;
+		}
+
+		fsl_ifc_run_command(mtd);
+		return;
+	}
+
+	case NAND_CMD_STATUS:
+		out_be32(&ifc->ifc_nand.nand_fir0,
+				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				(IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT));
+		out_be32(&ifc->ifc_nand.nand_fcr0,
+				NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT);
+		out_be32(&ifc->ifc_nand.nand_fbcr, 1);
+		set_addr(mtd, 0, 0, 0);
+		ifc_nand_ctrl->read_bytes = 1;
+
+		fsl_ifc_run_command(mtd);
+
+		/*
+		 * The chip always seems to report that it is
+		 * write-protected, even when it is not.
+		 */
+		setbits8(ifc_nand_ctrl->addr, NAND_STATUS_WP);
+		return;
+
+	case NAND_CMD_RESET:
+		out_be32(&ifc->ifc_nand.nand_fir0,
+				IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT);
+		out_be32(&ifc->ifc_nand.nand_fcr0,
+				NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT);
+		fsl_ifc_run_command(mtd);
+		return;
+
+	default:
+		dev_err(priv->dev, "%s: error, unsupported command 0x%x.\n",
+					__func__, command);
+	}
+}
+
+static void fsl_ifc_select_chip(struct mtd_info *mtd, int chip)
+{
+	/* The hardware does not seem to support multiple
+	 * chips per bank.
+	 */
+}
+
+/*
+ * Write buf to the IFC NAND Controller Data Buffer
+ */
+static void fsl_ifc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	unsigned int bufsize = mtd->writesize + mtd->oobsize;
+
+	if (len <= 0) {
+		dev_err(priv->dev, "%s: len %d bytes", __func__, len);
+		return;
+	}
+
+	if ((unsigned int)len > bufsize - ifc_nand_ctrl->index) {
+		dev_err(priv->dev,
+			"%s: beyond end of buffer (%d requested, %u available)\n",
+			__func__, len, bufsize - ifc_nand_ctrl->index);
+		len = bufsize - ifc_nand_ctrl->index;
+	}
+
+	memcpy_toio(&ifc_nand_ctrl->addr[ifc_nand_ctrl->index], buf, len);
+	ifc_nand_ctrl->index += len;
+}
+
+/*
+ * Read a byte from either the IFC hardware buffer
+ * read function for 8-bit buswidth
+ */
+static uint8_t fsl_ifc_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+
+	/*
+	 * If there are still bytes in the IFC buffer, then use the
+	 * next byte.
+	 */
+	if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes)
+		return in_8(&ifc_nand_ctrl->addr[ifc_nand_ctrl->index++]);
+
+	dev_err(priv->dev, "%s: beyond end of buffer\n", __func__);
+	return ERR_BYTE;
+}
+
+/*
+ * Read two bytes from the IFC hardware buffer
+ * read function for 16-bit buswith
+ */
+static uint8_t fsl_ifc_read_byte16(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	uint16_t data;
+
+	/*
+	 * If there are still bytes in the IFC buffer, then use the
+	 * next byte.
+	 */
+	if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes) {
+		data = in_be16((uint16_t *)&ifc_nand_ctrl->
+					addr[ifc_nand_ctrl->index]);
+		ifc_nand_ctrl->index += 2;
+		return (uint8_t) data;
+	}
+
+	dev_err(priv->dev, "%s: beyond end of buffer\n", __func__);
+	return ERR_BYTE;
+}
+
+/*
+ * Read from the IFC Controller Data Buffer
+ */
+static void fsl_ifc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	int avail;
+
+	if (len < 0) {
+		dev_err(priv->dev, "%s: len %d bytes", __func__, len);
+		return;
+	}
+
+	avail = min((unsigned int)len,
+			ifc_nand_ctrl->read_bytes - ifc_nand_ctrl->index);
+	memcpy_fromio(buf, &ifc_nand_ctrl->addr[ifc_nand_ctrl->index], avail);
+	ifc_nand_ctrl->index += avail;
+
+	if (len > avail)
+		dev_err(priv->dev,
+			"%s: beyond end of buffer (%d requested, %d available)\n",
+			__func__, len, avail);
+}
+
+/*
+ * Verify buffer against the IFC Controller Data Buffer
+ */
+static int fsl_ifc_verify_buf(struct mtd_info *mtd,
+			       const u_char *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl;
+	int i;
+
+	if (len < 0) {
+		dev_err(priv->dev, "%s: write_buf of %d bytes", __func__, len);
+		return -EINVAL;
+	}
+
+	if ((unsigned int)len > nctrl->read_bytes - nctrl->index) {
+		dev_err(priv->dev,
+			"%s: beyond end of buffer (%d requested, %u available)\n",
+			__func__, len, nctrl->read_bytes - nctrl->index);
+
+		nctrl->index = nctrl->read_bytes;
+		return -EINVAL;
+	}
+
+	for (i = 0; i < len; i++)
+		if (in_8(&nctrl->addr[nctrl->index + i]) != buf[i])
+			break;
+
+	nctrl->index += len;
+
+	if (i != len)
+		return -EIO;
+	if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * This function is called after Program and Erase Operations to
+ * check for success or failure.
+ */
+static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
+{
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	u32 nand_fsr;
+
+	/* Use READ_STATUS command, but wait for the device to be ready */
+	out_be32(&ifc->ifc_nand.nand_fir0,
+		 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+		 (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT));
+	out_be32(&ifc->ifc_nand.nand_fcr0, NAND_CMD_STATUS <<
+			IFC_NAND_FCR0_CMD0_SHIFT);
+	out_be32(&ifc->ifc_nand.nand_fbcr, 1);
+	set_addr(mtd, 0, 0, 0);
+	ifc_nand_ctrl->read_bytes = 1;
+
+	fsl_ifc_run_command(mtd);
+
+	nand_fsr = in_be32(&ifc->ifc_nand.nand_fsr);
+
+	/*
+	 * The chip always seems to report that it is
+	 * write-protected, even when it is not.
+	 */
+	return nand_fsr | NAND_STATUS_WP;
+}
+
+static int fsl_ifc_read_page(struct mtd_info *mtd,
+			      struct nand_chip *chip,
+			      uint8_t *buf, int page)
+{
+	struct fsl_ifc_mtd *priv = chip->priv;
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+
+	fsl_ifc_read_buf(mtd, buf, mtd->writesize);
+	fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC) {
+		dev_err(priv->dev, "NAND Flash Write Protect Error\n");
+		mtd->ecc_stats.failed++;
+	}
+
+	return 0;
+}
+
+/* ECC will be calculated automatically, and errors will be detected in
+ * waitfunc.
+ */
+static void fsl_ifc_write_page(struct mtd_info *mtd,
+				struct nand_chip *chip,
+				const uint8_t *buf)
+{
+	fsl_ifc_write_buf(mtd, buf, mtd->writesize);
+	fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+}
+
+static int fsl_ifc_chip_init_tail(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsl_ifc_mtd *priv = chip->priv;
+
+	dev_dbg(priv->dev, "%s: nand->numchips = %d\n", __func__,
+							chip->numchips);
+	dev_dbg(priv->dev, "%s: nand->chipsize = %lld\n", __func__,
+							chip->chipsize);
+	dev_dbg(priv->dev, "%s: nand->pagemask = %8x\n", __func__,
+							chip->pagemask);
+	dev_dbg(priv->dev, "%s: nand->chip_delay = %d\n", __func__,
+							chip->chip_delay);
+	dev_dbg(priv->dev, "%s: nand->badblockpos = %d\n", __func__,
+							chip->badblockpos);
+	dev_dbg(priv->dev, "%s: nand->chip_shift = %d\n", __func__,
+							chip->chip_shift);
+	dev_dbg(priv->dev, "%s: nand->page_shift = %d\n", __func__,
+							chip->page_shift);
+	dev_dbg(priv->dev, "%s: nand->phys_erase_shift = %d\n", __func__,
+							chip->phys_erase_shift);
+	dev_dbg(priv->dev, "%s: nand->ecclayout = %p\n", __func__,
+							chip->ecclayout);
+	dev_dbg(priv->dev, "%s: nand->ecc.mode = %d\n", __func__,
+							chip->ecc.mode);
+	dev_dbg(priv->dev, "%s: nand->ecc.steps = %d\n", __func__,
+							chip->ecc.steps);
+	dev_dbg(priv->dev, "%s: nand->ecc.bytes = %d\n", __func__,
+							chip->ecc.bytes);
+	dev_dbg(priv->dev, "%s: nand->ecc.total = %d\n", __func__,
+							chip->ecc.total);
+	dev_dbg(priv->dev, "%s: nand->ecc.layout = %p\n", __func__,
+							chip->ecc.layout);
+	dev_dbg(priv->dev, "%s: mtd->flags = %08x\n", __func__, mtd->flags);
+	dev_dbg(priv->dev, "%s: mtd->size = %lld\n", __func__, mtd->size);
+	dev_dbg(priv->dev, "%s: mtd->erasesize = %d\n", __func__,
+							mtd->erasesize);
+	dev_dbg(priv->dev, "%s: mtd->writesize = %d\n", __func__,
+							mtd->writesize);
+	dev_dbg(priv->dev, "%s: mtd->oobsize = %d\n", __func__,
+							mtd->oobsize);
+
+	return 0;
+}
+
+static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
+{
+	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	struct nand_chip *chip = &priv->chip;
+	struct nand_ecclayout *layout;
+	u32 csor;
+
+	/* Fill in fsl_ifc_mtd structure */
+	priv->mtd.priv = chip;
+	priv->mtd.owner = THIS_MODULE;
+
+	/* fill in nand_chip structure */
+	/* set up function call table */
+	if ((in_be32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16)
+		chip->read_byte = fsl_ifc_read_byte16;
+	else
+		chip->read_byte = fsl_ifc_read_byte;
+
+	chip->write_buf = fsl_ifc_write_buf;
+	chip->read_buf = fsl_ifc_read_buf;
+	chip->verify_buf = fsl_ifc_verify_buf;
+	chip->select_chip = fsl_ifc_select_chip;
+	chip->cmdfunc = fsl_ifc_cmdfunc;
+	chip->waitfunc = fsl_ifc_wait;
+
+	chip->bbt_td = &bbt_main_descr;
+	chip->bbt_md = &bbt_mirror_descr;
+
+	out_be32(&ifc->ifc_nand.ncfgr, 0x0);
+
+	/* set up nand options */
+	chip->options = NAND_NO_READRDY | NAND_NO_AUTOINCR |
+			NAND_USE_FLASH_BBT;
+
+	if (in_be32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) {
+		chip->read_byte = fsl_ifc_read_byte16;
+		chip->options |= NAND_BUSWIDTH_16;
+	} else {
+		chip->read_byte = fsl_ifc_read_byte;
+	}
+
+	chip->controller = &ifc_nand_ctrl->controller;
+	chip->priv = priv;
+
+	chip->ecc.read_page = fsl_ifc_read_page;
+	chip->ecc.write_page = fsl_ifc_write_page;
+
+	csor = in_be32(&ifc->csor_cs[priv->bank].csor);
+
+	/* Hardware generates ECC per 512 Bytes */
+	chip->ecc.size = 512;
+	chip->ecc.bytes = 8;
+
+	switch (csor & CSOR_NAND_PGS_MASK) {
+	case CSOR_NAND_PGS_512:
+		if (chip->options & NAND_BUSWIDTH_16) {
+			layout = &oob_512_16bit_ecc4;
+		} else {
+			layout = &oob_512_8bit_ecc4;
+
+			/* Avoid conflict with bad block marker */
+			bbt_main_descr.offs = 0;
+			bbt_mirror_descr.offs = 0;
+		}
+
+		priv->bufnum_mask = 15;
+		break;
+
+	case CSOR_NAND_PGS_2K:
+		layout = &oob_2048_ecc4;
+		priv->bufnum_mask = 3;
+		break;
+
+	case CSOR_NAND_PGS_4K:
+		if ((csor & CSOR_NAND_ECC_MODE_MASK) ==
+		    CSOR_NAND_ECC_MODE_4) {
+			layout = &oob_4096_ecc4;
+		} else {
+			layout = &oob_4096_ecc8;
+			chip->ecc.bytes = 16;
+		}
+
+		priv->bufnum_mask = 1;
+		break;
+
+	default:
+		dev_err(priv->dev, "bad csor %#x: bad page size\n", csor);
+		return -ENODEV;
+	}
+
+	/* Must also set CSOR_NAND_ECC_ENC_EN if DEC_EN set */
+	if (csor & CSOR_NAND_ECC_DEC_EN) {
+		chip->ecc.mode = NAND_ECC_HW;
+		chip->ecc.layout = layout;
+	} else {
+		chip->ecc.mode = NAND_ECC_SOFT;
+	}
+
+	return 0;
+}
+
+static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv)
+{
+	nand_release(&priv->mtd);
+
+	kfree(priv->mtd.name);
+
+	if (priv->vbase)
+		iounmap(priv->vbase);
+
+	ifc_nand_ctrl->chips[priv->bank] = NULL;
+	dev_set_drvdata(priv->dev, NULL);
+	kfree(priv);
+
+	return 0;
+}
+
+static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank,
+		      phys_addr_t addr)
+{
+	u32 cspr = in_be32(&ifc->cspr_cs[bank].cspr);
+
+	if (!(cspr & CSPR_V))
+		return 0;
+	if ((cspr & CSPR_MSEL) != CSPR_MSEL_NAND)
+		return 0;
+
+	return (cspr & CSPR_BA) == convert_ifc_address(addr);
+}
+
+static DEFINE_MUTEX(fsl_ifc_nand_mutex);
+
+static int __devinit fsl_ifc_nand_probe(struct platform_device *dev)
+{
+	struct fsl_ifc_regs __iomem *ifc;
+	struct fsl_ifc_mtd *priv;
+	struct resource res;
+	static const char *part_probe_types[]
+		= { "cmdlinepart", "RedBoot", NULL };
+	struct mtd_partition *parts;
+	int ret;
+	int bank;
+	struct device_node *node = dev->dev.of_node;
+
+	if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs)
+		return -ENODEV;
+	ifc = fsl_ifc_ctrl_dev->regs;
+
+	/* get, allocate and map the memory resource */
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret) {
+		dev_err(&dev->dev, "%s: failed to get resource\n", __func__);
+		return ret;
+	}
+
+	/* find which chip select it is connected to */
+	for (bank = 0; bank < FSL_IFC_BANK_COUNT; bank++) {
+		if (match_bank(ifc, bank, res.start))
+			break;
+	}
+
+	if (bank >= FSL_IFC_BANK_COUNT) {
+		dev_err(&dev->dev, "%s: address did not match any chip selects\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	mutex_lock(&fsl_ifc_nand_mutex);
+	if (!fsl_ifc_ctrl_dev->nand) {
+		ifc_nand_ctrl = kzalloc(sizeof(*ifc_nand_ctrl), GFP_KERNEL);
+		if (!ifc_nand_ctrl) {
+			dev_err(&dev->dev, "failed to allocate memory\n");
+			mutex_unlock(&fsl_ifc_nand_mutex);
+			return -ENOMEM;
+		}
+
+		ifc_nand_ctrl->read_bytes = 0;
+		ifc_nand_ctrl->index = 0;
+		ifc_nand_ctrl->addr = NULL;
+		fsl_ifc_ctrl_dev->nand = ifc_nand_ctrl;
+
+		spin_lock_init(&ifc_nand_ctrl->controller.lock);
+		init_waitqueue_head(&ifc_nand_ctrl->controller.wq);
+	} else {
+		ifc_nand_ctrl = fsl_ifc_ctrl_dev->nand;
+	}
+	mutex_unlock(&fsl_ifc_nand_mutex);
+
+	ifc_nand_ctrl->chips[bank] = priv;
+	priv->bank = bank;
+	priv->ctrl = fsl_ifc_ctrl_dev;
+	priv->dev = &dev->dev;
+
+	priv->vbase = ioremap(res.start, resource_size(&res));
+	if (!priv->vbase) {
+		dev_err(priv->dev, "%s: failed to map chip region\n", __func__);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_set_drvdata(priv->dev, priv);
+
+	out_be32(&ifc->ifc_nand.nand_evter_en,
+			IFC_NAND_EVTER_EN_OPC_EN |
+			IFC_NAND_EVTER_EN_FTOER_EN |
+			IFC_NAND_EVTER_EN_WPER_EN);
+
+	/* enable NAND Machine Interrupts */
+	out_be32(&ifc->ifc_nand.nand_evter_intr_en,
+			IFC_NAND_EVTER_INTR_OPCIR_EN |
+			IFC_NAND_EVTER_INTR_FTOERIR_EN |
+			IFC_NAND_EVTER_INTR_WPERIR_EN);
+
+	priv->mtd.name = kasprintf(GFP_KERNEL, "%x.flash", (unsigned)res.start);
+	if (!priv->mtd.name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = fsl_ifc_chip_init(priv);
+	if (ret)
+		goto err;
+
+	ret = nand_scan_ident(&priv->mtd, 1, NULL);
+	if (ret)
+		goto err;
+
+	ret = fsl_ifc_chip_init_tail(&priv->mtd);
+	if (ret)
+		goto err;
+
+	ret = nand_scan_tail(&priv->mtd);
+	if (ret)
+		goto err;
+
+	/* First look for RedBoot table or partitions on the command
+	 * line, these take precedence over device tree information */
+	ret = parse_mtd_partitions(&priv->mtd, part_probe_types, &parts, 0);
+	if (ret < 0)
+		goto err;
+
+	if (ret == 0) {
+		ret = of_mtd_parse_partitions(priv->dev, node, &parts);
+		if (ret < 0)
+			goto err;
+	}
+	mtd_device_register(&priv->mtd, parts, ret);
+
+	dev_info(priv->dev, "IFC NAND device at 0x%llx, bank %d\n",
+		 (unsigned long long)res.start, priv->bank);
+	return 0;
+
+err:
+	fsl_ifc_chip_remove(priv);
+	return ret;
+}
+
+static int fsl_ifc_nand_remove(struct platform_device *dev)
+{
+	struct fsl_ifc_mtd *priv = dev_get_drvdata(&dev->dev);
+
+	fsl_ifc_chip_remove(priv);
+
+	mutex_lock(&fsl_ifc_nand_mutex);
+	ifc_nand_ctrl->counter--;
+	if (!ifc_nand_ctrl->counter) {
+		fsl_ifc_ctrl_dev->nand = NULL;
+		kfree(ifc_nand_ctrl);
+	}
+	mutex_unlock(&fsl_ifc_nand_mutex);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_ifc_nand_match[] = {
+	{
+		.compatible = "fsl,ifc-nand",
+	},
+	{}
+};
+
+static struct platform_driver fsl_ifc_nand_driver = {
+	.driver = {
+		.name	= "fsl,ifc-nand",
+		.owner = THIS_MODULE,
+		.of_match_table = fsl_ifc_nand_match,
+	},
+	.probe       = fsl_ifc_nand_probe,
+	.remove      = fsl_ifc_nand_remove,
+};
+
+static int __init fsl_ifc_nand_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&fsl_ifc_nand_driver);
+	if (ret)
+		printk(KERN_ERR "fsl-ifc: Failed to register platform"
+				"driver\n");
+
+	return ret;
+}
+
+static void __exit fsl_ifc_nand_exit(void)
+{
+	platform_driver_unregister(&fsl_ifc_nand_driver);
+}
+
+module_init(fsl_ifc_nand_init);
+module_exit(fsl_ifc_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Freescale");
+MODULE_DESCRIPTION("Freescale Integrated Flash Controller MTD NAND driver");
-- 
1.7.1

^ permalink raw reply related

* [PATCH v2] Integrated Flash Controller support
From: b35362 @ 2011-10-31  9:38 UTC (permalink / raw)
  To: dwmw2, Artem.Bityutskiy
  Cc: r58472, linux-kernel, linux-mtd, scottwood, akpm, linuxppc-dev

From: Liu Shuo <b35362@freescale.com>

Integrated Flash Controller supports various flashes like NOR, NAND
and other devices using NOR, NAND and GPCM Machine available on it.
IFC supports four chip selects.

Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Liu Shuo <b35362@freescale.com>
---
 arch/powerpc/Kconfig               |    4 +
 arch/powerpc/include/asm/fsl_ifc.h |  834 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/sysdev/Makefile       |    1 +
 arch/powerpc/sysdev/fsl_ifc.c      |  322 ++++++++++++++
 4 files changed, 1161 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/include/asm/fsl_ifc.h
 create mode 100644 arch/powerpc/sysdev/fsl_ifc.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f8e578b..3cd1e64 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -688,6 +688,10 @@ config FSL_LBC
 	  controller.  Also contains some common code used by
 	  drivers for specific local bus peripherals.
 
+config FSL_IFC
+	bool
+        depends on FSL_SOC
+
 config FSL_GTM
 	bool
 	depends on PPC_83xx || QUICC_ENGINE || CPM2
diff --git a/arch/powerpc/include/asm/fsl_ifc.h b/arch/powerpc/include/asm/fsl_ifc.h
new file mode 100644
index 0000000..b955012
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_ifc.h
@@ -0,0 +1,834 @@
+/* Freescale Integrated Flash Controller
+ *
+ * Copyright 2011 Freescale Semiconductor, Inc
+ *
+ * Author: Dipen Dudhat <dipen.dudhat@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __ASM_FSL_IFC_H
+#define __ASM_FSL_IFC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+
+#define FSL_IFC_BANK_COUNT 4
+
+/*
+ * CSPR - Chip Select Property Register
+ */
+#define CSPR_BA				0xFFFF0000
+#define CSPR_BA_SHIFT			16
+#define CSPR_PORT_SIZE			0x00000180
+#define CSPR_PORT_SIZE_SHIFT		7
+/* Port Size 8 bit */
+#define CSPR_PORT_SIZE_8		0x00000080
+/* Port Size 16 bit */
+#define CSPR_PORT_SIZE_16		0x00000100
+/* Port Size 32 bit */
+#define CSPR_PORT_SIZE_32		0x00000180
+/* Write Protect */
+#define CSPR_WP				0x00000040
+#define CSPR_WP_SHIFT			6
+/* Machine Select */
+#define CSPR_MSEL			0x00000006
+#define CSPR_MSEL_SHIFT			1
+/* NOR */
+#define CSPR_MSEL_NOR			0x00000000
+/* NAND */
+#define CSPR_MSEL_NAND			0x00000002
+/* GPCM */
+#define CSPR_MSEL_GPCM			0x00000004
+/* Bank Valid */
+#define CSPR_V				0x00000001
+#define CSPR_V_SHIFT			0
+
+/*
+ * Address Mask Register
+ */
+#define IFC_AMASK_MASK			0xFFFF0000
+#define IFC_AMASK_SHIFT			16
+#define IFC_AMASK(n)			(IFC_AMASK_MASK << \
+					(__ilog2(n) - IFC_AMASK_SHIFT))
+
+/*
+ * Chip Select Option Register IFC_NAND Machine
+ */
+/* Enable ECC Encoder */
+#define CSOR_NAND_ECC_ENC_EN		0x80000000
+#define CSOR_NAND_ECC_MODE_MASK		0x30000000
+/* 4 bit correction per 520 Byte sector */
+#define CSOR_NAND_ECC_MODE_4		0x00000000
+/* 8 bit correction per 528 Byte sector */
+#define CSOR_NAND_ECC_MODE_8		0x10000000
+/* Enable ECC Decoder */
+#define CSOR_NAND_ECC_DEC_EN		0x04000000
+/* Row Address Length */
+#define CSOR_NAND_RAL_MASK		0x01800000
+#define CSOR_NAND_RAL_SHIFT		20
+#define CSOR_NAND_RAL_1			0x00000000
+#define CSOR_NAND_RAL_2			0x00800000
+#define CSOR_NAND_RAL_3			0x01000000
+#define CSOR_NAND_RAL_4			0x01800000
+/* Page Size 512b, 2k, 4k */
+#define CSOR_NAND_PGS_MASK		0x00180000
+#define CSOR_NAND_PGS_SHIFT		16
+#define CSOR_NAND_PGS_512		0x00000000
+#define CSOR_NAND_PGS_2K		0x00080000
+#define CSOR_NAND_PGS_4K		0x00100000
+/* Spare region Size */
+#define CSOR_NAND_SPRZ_MASK		0x0000E000
+#define CSOR_NAND_SPRZ_SHIFT		13
+#define CSOR_NAND_SPRZ_16		0x00000000
+#define CSOR_NAND_SPRZ_64		0x00002000
+#define CSOR_NAND_SPRZ_128		0x00004000
+#define CSOR_NAND_SPRZ_210		0x00006000
+#define CSOR_NAND_SPRZ_218		0x00008000
+#define CSOR_NAND_SPRZ_224		0x0000A000
+/* Pages Per Block */
+#define CSOR_NAND_PB_MASK		0x00000700
+#define CSOR_NAND_PB_SHIFT		8
+#define CSOR_NAND_PB(n)		((__ilog2(n) - 5) << CSOR_NAND_PB_SHIFT)
+/* Time for Read Enable High to Output High Impedance */
+#define CSOR_NAND_TRHZ_MASK		0x0000001C
+#define CSOR_NAND_TRHZ_SHIFT		2
+#define CSOR_NAND_TRHZ_20		0x00000000
+#define CSOR_NAND_TRHZ_40		0x00000004
+#define CSOR_NAND_TRHZ_60		0x00000008
+#define CSOR_NAND_TRHZ_80		0x0000000C
+#define CSOR_NAND_TRHZ_100		0x00000010
+/* Buffer control disable */
+#define CSOR_NAND_BCTLD			0x00000001
+
+/*
+ * Chip Select Option Register - NOR Flash Mode
+ */
+/* Enable Address shift Mode */
+#define CSOR_NOR_ADM_SHFT_MODE_EN	0x80000000
+/* Page Read Enable from NOR device */
+#define CSOR_NOR_PGRD_EN		0x10000000
+/* AVD Toggle Enable during Burst Program */
+#define CSOR_NOR_AVD_TGL_PGM_EN		0x01000000
+/* Address Data Multiplexing Shift */
+#define CSOR_NOR_ADM_MASK		0x0003E000
+#define CSOR_NOR_ADM_SHIFT_SHIFT	13
+#define CSOR_NOR_ADM_SHIFT(n)	((n) << CSOR_NOR_ADM_SHIFT_SHIFT)
+/* Type of the NOR device hooked */
+#define CSOR_NOR_NOR_MODE_AYSNC_NOR	0x00000000
+#define CSOR_NOR_NOR_MODE_AVD_NOR	0x00000020
+/* Time for Read Enable High to Output High Impedance */
+#define CSOR_NOR_TRHZ_MASK		0x0000001C
+#define CSOR_NOR_TRHZ_SHIFT		2
+#define CSOR_NOR_TRHZ_20		0x00000000
+#define CSOR_NOR_TRHZ_40		0x00000004
+#define CSOR_NOR_TRHZ_60		0x00000008
+#define CSOR_NOR_TRHZ_80		0x0000000C
+#define CSOR_NOR_TRHZ_100		0x00000010
+/* Buffer control disable */
+#define CSOR_NOR_BCTLD			0x00000001
+
+/*
+ * Chip Select Option Register - GPCM Mode
+ */
+/* GPCM Mode - Normal */
+#define CSOR_GPCM_GPMODE_NORMAL		0x00000000
+/* GPCM Mode - GenericASIC */
+#define CSOR_GPCM_GPMODE_ASIC		0x80000000
+/* Parity Mode odd/even */
+#define CSOR_GPCM_PARITY_EVEN		0x40000000
+/* Parity Checking enable/disable */
+#define CSOR_GPCM_PAR_EN		0x20000000
+/* GPCM Timeout Count */
+#define CSOR_GPCM_GPTO_MASK		0x0F000000
+#define CSOR_GPCM_GPTO_SHIFT		24
+#define CSOR_GPCM_GPTO(n)	((__ilog2(n) - 8) << CSOR_GPCM_GPTO_SHIFT)
+/* GPCM External Access Termination mode for read access */
+#define CSOR_GPCM_RGETA_EXT		0x00080000
+/* GPCM External Access Termination mode for write access */
+#define CSOR_GPCM_WGETA_EXT		0x00040000
+/* Address Data Multiplexing Shift */
+#define CSOR_GPCM_ADM_MASK		0x0003E000
+#define CSOR_GPCM_ADM_SHIFT_SHIFT	13
+#define CSOR_GPCM_ADM_SHIFT(n)	((n) << CSOR_GPCM_ADM_SHIFT_SHIFT)
+/* Generic ASIC Parity error indication delay */
+#define CSOR_GPCM_GAPERRD_MASK		0x00000180
+#define CSOR_GPCM_GAPERRD_SHIFT		7
+#define CSOR_GPCM_GAPERRD(n)	(((n) - 1) << CSOR_GPCM_GAPERRD_SHIFT)
+/* Time for Read Enable High to Output High Impedance */
+#define CSOR_GPCM_TRHZ_MASK		0x0000001C
+#define CSOR_GPCM_TRHZ_20		0x00000000
+#define CSOR_GPCM_TRHZ_40		0x00000004
+#define CSOR_GPCM_TRHZ_60		0x00000008
+#define CSOR_GPCM_TRHZ_80		0x0000000C
+#define CSOR_GPCM_TRHZ_100		0x00000010
+/* Buffer control disable */
+#define CSOR_GPCM_BCTLD			0x00000001
+
+/*
+ * Ready Busy Status Register (RB_STAT)
+ */
+/* CSn is READY */
+#define IFC_RB_STAT_READY_CS0		0x80000000
+#define IFC_RB_STAT_READY_CS1		0x40000000
+#define IFC_RB_STAT_READY_CS2		0x20000000
+#define IFC_RB_STAT_READY_CS3		0x10000000
+
+/*
+ * General Control Register (GCR)
+ */
+#define IFC_GCR_MASK			0x8000F800
+/* reset all IFC hardware */
+#define IFC_GCR_SOFT_RST_ALL		0x80000000
+/* Turnaroud Time of external buffer */
+#define IFC_GCR_TBCTL_TRN_TIME		0x0000F800
+#define IFC_GCR_TBCTL_TRN_TIME_SHIFT	11
+
+/*
+ * Common Event and Error Status Register (CM_EVTER_STAT)
+ */
+/* Chip select error */
+#define IFC_CM_EVTER_STAT_CSER		0x80000000
+
+/*
+ * Common Event and Error Enable Register (CM_EVTER_EN)
+ */
+/* Chip select error checking enable */
+#define IFC_CM_EVTER_EN_CSEREN		0x80000000
+
+/*
+ * Common Event and Error Interrupt Enable Register (CM_EVTER_INTR_EN)
+ */
+/* Chip select error interrupt enable */
+#define IFC_CM_EVTER_INTR_EN_CSERIREN	0x80000000
+
+/*
+ * Common Transfer Error Attribute Register-0 (CM_ERATTR0)
+ */
+/* transaction type of error Read/Write */
+#define IFC_CM_ERATTR0_ERTYP_READ	0x80000000
+#define IFC_CM_ERATTR0_ERAID		0x0FF00000
+#define IFC_CM_ERATTR0_ERAID_SHIFT	20
+#define IFC_CM_ERATTR0_ESRCID		0x0000FF00
+#define IFC_CM_ERATTR0_ESRCID_SHIFT	8
+
+/*
+ * Clock Control Register (CCR)
+ */
+#define IFC_CCR_MASK			0x0F0F8800
+/* Clock division ratio */
+#define IFC_CCR_CLK_DIV_MASK		0x0F000000
+#define IFC_CCR_CLK_DIV_SHIFT		24
+#define IFC_CCR_CLK_DIV(n)		((n-1) << IFC_CCR_CLK_DIV_SHIFT)
+/* IFC Clock Delay */
+#define IFC_CCR_CLK_DLY_MASK		0x000F0000
+#define IFC_CCR_CLK_DLY_SHIFT		16
+#define IFC_CCR_CLK_DLY(n)		((n) << IFC_CCR_CLK_DLY_SHIFT)
+/* Invert IFC clock before sending out */
+#define IFC_CCR_INV_CLK_EN		0x00008000
+/* Fedback IFC Clock */
+#define IFC_CCR_FB_IFC_CLK_SEL		0x00000800
+
+/*
+ * Clock Status Register (CSR)
+ */
+/* Clk is stable */
+#define IFC_CSR_CLK_STAT_STABLE		0x80000000
+
+/*
+ * IFC_NAND Machine Specific Registers
+ */
+/*
+ * NAND Configuration Register (NCFGR)
+ */
+/* Auto Boot Mode */
+#define IFC_NAND_NCFGR_BOOT		0x80000000
+/* Addressing Mode-ROW0+n/COL0 */
+#define IFC_NAND_NCFGR_ADDR_MODE_RC0	0x00000000
+/* Addressing Mode-ROW0+n/COL0+n */
+#define IFC_NAND_NCFGR_ADDR_MODE_RC1	0x00400000
+/* Number of loop iterations of FIR sequences for multi page operations */
+#define IFC_NAND_NCFGR_NUM_LOOP_MASK	0x0000F000
+#define IFC_NAND_NCFGR_NUM_LOOP_SHIFT	12
+#define IFC_NAND_NCFGR_NUM_LOOP(n)	((n) << IFC_NAND_NCFGR_NUM_LOOP_SHIFT)
+/* Number of wait cycles */
+#define IFC_NAND_NCFGR_NUM_WAIT_MASK	0x000000FF
+#define IFC_NAND_NCFGR_NUM_WAIT_SHIFT	0
+
+/*
+ * NAND Flash Command Registers (NAND_FCR0/NAND_FCR1)
+ */
+/* General purpose FCM flash command bytes CMD0-CMD7 */
+#define IFC_NAND_FCR0_CMD0		0xFF000000
+#define IFC_NAND_FCR0_CMD0_SHIFT	24
+#define IFC_NAND_FCR0_CMD1		0x00FF0000
+#define IFC_NAND_FCR0_CMD1_SHIFT	16
+#define IFC_NAND_FCR0_CMD2		0x0000FF00
+#define IFC_NAND_FCR0_CMD2_SHIFT	8
+#define IFC_NAND_FCR0_CMD3		0x000000FF
+#define IFC_NAND_FCR0_CMD3_SHIFT	0
+#define IFC_NAND_FCR1_CMD4		0xFF000000
+#define IFC_NAND_FCR1_CMD4_SHIFT	24
+#define IFC_NAND_FCR1_CMD5		0x00FF0000
+#define IFC_NAND_FCR1_CMD5_SHIFT	16
+#define IFC_NAND_FCR1_CMD6		0x0000FF00
+#define IFC_NAND_FCR1_CMD6_SHIFT	8
+#define IFC_NAND_FCR1_CMD7		0x000000FF
+#define IFC_NAND_FCR1_CMD7_SHIFT	0
+
+/*
+ * Flash ROW and COL Address Register (ROWn, COLn)
+ */
+/* Main/spare region locator */
+#define IFC_NAND_COL_MS			0x80000000
+/* Column Address */
+#define IFC_NAND_COL_CA_MASK		0x00000FFF
+
+/*
+ * NAND Flash Byte Count Register (NAND_BC)
+ */
+/* Byte Count for read/Write */
+#define IFC_NAND_BC			0x000001FF
+
+/*
+ * NAND Flash Instruction Registers (NAND_FIR0/NAND_FIR1/NAND_FIR2)
+ */
+/* NAND Machine specific opcodes OP0-OP14*/
+#define IFC_NAND_FIR0_OP0		0xFC000000
+#define IFC_NAND_FIR0_OP0_SHIFT		26
+#define IFC_NAND_FIR0_OP1		0x03F00000
+#define IFC_NAND_FIR0_OP1_SHIFT		20
+#define IFC_NAND_FIR0_OP2		0x000FC000
+#define IFC_NAND_FIR0_OP2_SHIFT		14
+#define IFC_NAND_FIR0_OP3		0x00003F00
+#define IFC_NAND_FIR0_OP3_SHIFT		8
+#define IFC_NAND_FIR0_OP4		0x000000FC
+#define IFC_NAND_FIR0_OP4_SHIFT		2
+#define IFC_NAND_FIR1_OP5		0xFC000000
+#define IFC_NAND_FIR1_OP5_SHIFT		26
+#define IFC_NAND_FIR1_OP6		0x03F00000
+#define IFC_NAND_FIR1_OP6_SHIFT		20
+#define IFC_NAND_FIR1_OP7		0x000FC000
+#define IFC_NAND_FIR1_OP7_SHIFT		14
+#define IFC_NAND_FIR1_OP8		0x00003F00
+#define IFC_NAND_FIR1_OP8_SHIFT		8
+#define IFC_NAND_FIR1_OP9		0x000000FC
+#define IFC_NAND_FIR1_OP9_SHIFT		2
+#define IFC_NAND_FIR2_OP10		0xFC000000
+#define IFC_NAND_FIR2_OP10_SHIFT	26
+#define IFC_NAND_FIR2_OP11		0x03F00000
+#define IFC_NAND_FIR2_OP11_SHIFT	20
+#define IFC_NAND_FIR2_OP12		0x000FC000
+#define IFC_NAND_FIR2_OP12_SHIFT	14
+#define IFC_NAND_FIR2_OP13		0x00003F00
+#define IFC_NAND_FIR2_OP13_SHIFT	8
+#define IFC_NAND_FIR2_OP14		0x000000FC
+#define IFC_NAND_FIR2_OP14_SHIFT	2
+
+/*
+ * Instruction opcodes to be programmed
+ * in FIR registers- 6bits
+ */
+enum ifc_nand_fir_opcodes {
+	IFC_FIR_OP_NOP,
+	IFC_FIR_OP_CA0,
+	IFC_FIR_OP_CA1,
+	IFC_FIR_OP_CA2,
+	IFC_FIR_OP_CA3,
+	IFC_FIR_OP_RA0,
+	IFC_FIR_OP_RA1,
+	IFC_FIR_OP_RA2,
+	IFC_FIR_OP_RA3,
+	IFC_FIR_OP_CMD0,
+	IFC_FIR_OP_CMD1,
+	IFC_FIR_OP_CMD2,
+	IFC_FIR_OP_CMD3,
+	IFC_FIR_OP_CMD4,
+	IFC_FIR_OP_CMD5,
+	IFC_FIR_OP_CMD6,
+	IFC_FIR_OP_CMD7,
+	IFC_FIR_OP_CW0,
+	IFC_FIR_OP_CW1,
+	IFC_FIR_OP_CW2,
+	IFC_FIR_OP_CW3,
+	IFC_FIR_OP_CW4,
+	IFC_FIR_OP_CW5,
+	IFC_FIR_OP_CW6,
+	IFC_FIR_OP_CW7,
+	IFC_FIR_OP_WBCD,
+	IFC_FIR_OP_RBCD,
+	IFC_FIR_OP_BTRD,
+	IFC_FIR_OP_RDSTAT,
+	IFC_FIR_OP_NWAIT,
+	IFC_FIR_OP_WFR,
+	IFC_FIR_OP_SBRD,
+	IFC_FIR_OP_UA,
+	IFC_FIR_OP_RB,
+};
+
+/*
+ * NAND Chip Select Register (NAND_CSEL)
+ */
+#define IFC_NAND_CSEL			0x0C000000
+#define IFC_NAND_CSEL_SHIFT		26
+#define IFC_NAND_CSEL_CS0		0x00000000
+#define IFC_NAND_CSEL_CS1		0x04000000
+#define IFC_NAND_CSEL_CS2		0x08000000
+#define IFC_NAND_CSEL_CS3		0x0C000000
+
+/*
+ * NAND Operation Sequence Start (NANDSEQ_STRT)
+ */
+/* NAND Flash Operation Start */
+#define IFC_NAND_SEQ_STRT_FIR_STRT	0x80000000
+/* Automatic Erase */
+#define IFC_NAND_SEQ_STRT_AUTO_ERS	0x00800000
+/* Automatic Program */
+#define IFC_NAND_SEQ_STRT_AUTO_PGM	0x00100000
+/* Automatic Copyback */
+#define IFC_NAND_SEQ_STRT_AUTO_CPB	0x00020000
+/* Automatic Read Operation */
+#define IFC_NAND_SEQ_STRT_AUTO_RD	0x00004000
+/* Automatic Status Read */
+#define IFC_NAND_SEQ_STRT_AUTO_STAT_RD	0x00000800
+
+/*
+ * NAND Event and Error Status Register (NAND_EVTER_STAT)
+ */
+/* Operation Complete */
+#define IFC_NAND_EVTER_STAT_OPC		0x80000000
+/* Flash Timeout Error */
+#define IFC_NAND_EVTER_STAT_FTOER	0x08000000
+/* Write Protect Error */
+#define IFC_NAND_EVTER_STAT_WPER	0x04000000
+/* ECC Error */
+#define IFC_NAND_EVTER_STAT_ECCER	0x02000000
+/* RCW Load Done */
+#define IFC_NAND_EVTER_STAT_RCW_DN	0x00008000
+/* Boot Loadr Done */
+#define IFC_NAND_EVTER_STAT_BOOT_DN	0x00004000
+/* Bad Block Indicator search select */
+#define IFC_NAND_EVTER_STAT_BBI_SRCH_SE	0x00000800
+
+/*
+ * NAND Flash Page Read Completion Event Status Register
+ * (PGRDCMPL_EVT_STAT)
+ */
+#define PGRDCMPL_EVT_STAT_MASK		0xFFFF0000
+/* Small Page 0-15 Done */
+#define PGRDCMPL_EVT_STAT_SECTION_SP(n)	(1 << (31 - (n)))
+/* Large Page(2K) 0-3 Done */
+#define PGRDCMPL_EVT_STAT_LP_2K(n)	(0xF << (28 - (n)*4))
+/* Large Page(4K) 0-1 Done */
+#define PGRDCMPL_EVT_STAT_LP_4K(n)	(0xFF << (24 - (n)*8))
+
+/*
+ * NAND Event and Error Enable Register (NAND_EVTER_EN)
+ */
+/* Operation complete event enable */
+#define IFC_NAND_EVTER_EN_OPC_EN	0x80000000
+/* Page read complete event enable */
+#define IFC_NAND_EVTER_EN_PGRDCMPL_EN	0x20000000
+/* Flash Timeout error enable */
+#define IFC_NAND_EVTER_EN_FTOER_EN	0x08000000
+/* Write Protect error enable */
+#define IFC_NAND_EVTER_EN_WPER_EN	0x04000000
+/* ECC error logging enable */
+#define IFC_NAND_EVTER_EN_ECCER_EN	0x02000000
+
+/*
+ * NAND Event and Error Interrupt Enable Register (NAND_EVTER_INTR_EN)
+ */
+/* Enable interrupt for operation complete */
+#define IFC_NAND_EVTER_INTR_OPCIR_EN		0x80000000
+/* Enable interrupt for Page read complete */
+#define IFC_NAND_EVTER_INTR_PGRDCMPLIR_EN	0x20000000
+/* Enable interrupt for Flash timeout error */
+#define IFC_NAND_EVTER_INTR_FTOERIR_EN		0x08000000
+/* Enable interrupt for Write protect error */
+#define IFC_NAND_EVTER_INTR_WPERIR_EN		0x04000000
+/* Enable interrupt for ECC error*/
+#define IFC_NAND_EVTER_INTR_ECCERIR_EN		0x02000000
+
+/*
+ * NAND Transfer Error Attribute Register-0 (NAND_ERATTR0)
+ */
+#define IFC_NAND_ERATTR0_MASK		0x0C080000
+/* Error on CS0-3 for NAND */
+#define IFC_NAND_ERATTR0_ERCS_CS0	0x00000000
+#define IFC_NAND_ERATTR0_ERCS_CS1	0x04000000
+#define IFC_NAND_ERATTR0_ERCS_CS2	0x08000000
+#define IFC_NAND_ERATTR0_ERCS_CS3	0x0C000000
+/* Transaction type of error Read/Write */
+#define IFC_NAND_ERATTR0_ERTTYPE_READ	0x00080000
+
+/*
+ * NAND Flash Status Register (NAND_FSR)
+ */
+/* First byte of data read from read status op */
+#define IFC_NAND_NFSR_RS0		0xFF000000
+/* Second byte of data read from read status op */
+#define IFC_NAND_NFSR_RS1		0x00FF0000
+
+/*
+ * ECC Error Status Registers (ECCSTAT0-ECCSTAT3)
+ */
+/* Number of ECC errors on sector n (n = 0-15) */
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR0_MASK	0x0F000000
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR0_SHIFT	24
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR1_MASK	0x000F0000
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR1_SHIFT	16
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR2_MASK	0x00000F00
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR2_SHIFT	8
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR3_MASK	0x0000000F
+#define IFC_NAND_ECCSTAT0_ERRCNT_SECTOR3_SHIFT	0
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR4_MASK	0x0F000000
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR4_SHIFT	24
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR5_MASK	0x000F0000
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR5_SHIFT	16
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR6_MASK	0x00000F00
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR6_SHIFT	8
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR7_MASK	0x0000000F
+#define IFC_NAND_ECCSTAT1_ERRCNT_SECTOR7_SHIFT	0
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR8_MASK	0x0F000000
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR8_SHIFT	24
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR9_MASK	0x000F0000
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR9_SHIFT	16
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR10_MASK	0x00000F00
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR10_SHIFT	8
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR11_MASK	0x0000000F
+#define IFC_NAND_ECCSTAT2_ERRCNT_SECTOR11_SHIFT	0
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR12_MASK	0x0F000000
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR12_SHIFT	24
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR13_MASK	0x000F0000
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR13_SHIFT	16
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR14_MASK	0x00000F00
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR14_SHIFT	8
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR15_MASK	0x0000000F
+#define IFC_NAND_ECCSTAT3_ERRCNT_SECTOR15_SHIFT	0
+
+/*
+ * NAND Control Register (NANDCR)
+ */
+#define IFC_NAND_NCR_FTOCNT_MASK	0x1E000000
+#define IFC_NAND_NCR_FTOCNT_SHIFT	25
+#define IFC_NAND_NCR_FTOCNT(n)	((_ilog2(n) - 8)  << IFC_NAND_NCR_FTOCNT_SHIFT)
+
+/*
+ * NAND_AUTOBOOT_TRGR
+ */
+/* Trigger RCW load */
+#define IFC_NAND_AUTOBOOT_TRGR_RCW_LD	0x80000000
+/* Trigget Auto Boot */
+#define IFC_NAND_AUTOBOOT_TRGR_BOOT_LD	0x20000000
+
+/*
+ * NAND_MDR
+ */
+/* 1st read data byte when opcode SBRD */
+#define IFC_NAND_MDR_RDATA0		0xFF000000
+/* 2nd read data byte when opcode SBRD */
+#define IFC_NAND_MDR_RDATA1		0x00FF0000
+
+/*
+ * NOR Machine Specific Registers
+ */
+/*
+ * NOR Event and Error Status Register (NOR_EVTER_STAT)
+ */
+/* NOR Command Sequence Operation Complete */
+#define IFC_NOR_EVTER_STAT_OPC_NOR	0x80000000
+/* Write Protect Error */
+#define IFC_NOR_EVTER_STAT_WPER		0x04000000
+/* Command Sequence Timeout Error */
+#define IFC_NOR_EVTER_STAT_STOER	0x01000000
+
+/*
+ * NOR Event and Error Enable Register (NOR_EVTER_EN)
+ */
+/* NOR Command Seq complete event enable */
+#define IFC_NOR_EVTER_EN_OPCEN_NOR	0x80000000
+/* Write Protect Error Checking Enable */
+#define IFC_NOR_EVTER_EN_WPEREN		0x04000000
+/* Timeout Error Enable */
+#define IFC_NOR_EVTER_EN_STOEREN	0x01000000
+
+/*
+ * NOR Event and Error Interrupt Enable Register (NOR_EVTER_INTR_EN)
+ */
+/* Enable interrupt for OPC complete */
+#define IFC_NOR_EVTER_INTR_OPCEN_NOR	0x80000000
+/* Enable interrupt for write protect error */
+#define IFC_NOR_EVTER_INTR_WPEREN	0x04000000
+/* Enable interrupt for timeout error */
+#define IFC_NOR_EVTER_INTR_STOEREN	0x01000000
+
+/*
+ * NOR Transfer Error Attribute Register-0 (NOR_ERATTR0)
+ */
+/* Source ID for error transaction */
+#define IFC_NOR_ERATTR0_ERSRCID		0xFF000000
+/* AXI ID for error transation */
+#define IFC_NOR_ERATTR0_ERAID		0x000FF000
+/* Chip select corresponds to NOR error */
+#define IFC_NOR_ERATTR0_ERCS_CS0	0x00000000
+#define IFC_NOR_ERATTR0_ERCS_CS1	0x00000010
+#define IFC_NOR_ERATTR0_ERCS_CS2	0x00000020
+#define IFC_NOR_ERATTR0_ERCS_CS3	0x00000030
+/* Type of transaction read/write */
+#define IFC_NOR_ERATTR0_ERTYPE_READ	0x00000001
+
+/*
+ * NOR Transfer Error Attribute Register-2 (NOR_ERATTR2)
+ */
+#define IFC_NOR_ERATTR2_ER_NUM_PHASE_EXP	0x000F0000
+#define IFC_NOR_ERATTR2_ER_NUM_PHASE_PER	0x00000F00
+
+/*
+ * NOR Control Register (NORCR)
+ */
+#define IFC_NORCR_MASK			0x0F0F0000
+/* No. of Address/Data Phase */
+#define IFC_NORCR_NUM_PHASE_MASK	0x0F000000
+#define IFC_NORCR_NUM_PHASE_SHIFT	24
+#define IFC_NORCR_NUM_PHASE(n)	((n-1) << IFC_NORCR_NUM_PHASE_SHIFT)
+/* Sequence Timeout Count */
+#define IFC_NORCR_STOCNT_MASK		0x000F0000
+#define IFC_NORCR_STOCNT_SHIFT		16
+#define IFC_NORCR_STOCNT(n)	((__ilog2(n) - 8) << IFC_NORCR_STOCNT_SHIFT)
+
+/*
+ * GPCM Machine specific registers
+ */
+/*
+ * GPCM Event and Error Status Register (GPCM_EVTER_STAT)
+ */
+/* Timeout error */
+#define IFC_GPCM_EVTER_STAT_TOER	0x04000000
+/* Parity error */
+#define IFC_GPCM_EVTER_STAT_PER		0x01000000
+
+/*
+ * GPCM Event and Error Enable Register (GPCM_EVTER_EN)
+ */
+/* Timeout error enable */
+#define IFC_GPCM_EVTER_EN_TOER_EN	0x04000000
+/* Parity error enable */
+#define IFC_GPCM_EVTER_EN_PER_EN	0x01000000
+
+/*
+ * GPCM Event and Error Interrupt Enable Register (GPCM_EVTER_INTR_EN)
+ */
+/* Enable Interrupt for timeout error */
+#define IFC_GPCM_EEIER_TOERIR_EN	0x04000000
+/* Enable Interrupt for Parity error */
+#define IFC_GPCM_EEIER_PERIR_EN		0x01000000
+
+/*
+ * GPCM Transfer Error Attribute Register-0 (GPCM_ERATTR0)
+ */
+/* Source ID for error transaction */
+#define IFC_GPCM_ERATTR0_ERSRCID	0xFF000000
+/* AXI ID for error transaction */
+#define IFC_GPCM_ERATTR0_ERAID		0x000FF000
+/* Chip select corresponds to GPCM error */
+#define IFC_GPCM_ERATTR0_ERCS_CS0	0x00000000
+#define IFC_GPCM_ERATTR0_ERCS_CS1	0x00000040
+#define IFC_GPCM_ERATTR0_ERCS_CS2	0x00000080
+#define IFC_GPCM_ERATTR0_ERCS_CS3	0x000000C0
+/* Type of transaction read/Write */
+#define IFC_GPCM_ERATTR0_ERTYPE_READ	0x00000001
+
+/*
+ * GPCM Transfer Error Attribute Register-2 (GPCM_ERATTR2)
+ */
+/* On which beat of address/data parity error is observed */
+#define IFC_GPCM_ERATTR2_PERR_BEAT		0x00000C00
+/* Parity Error on byte */
+#define IFC_GPCM_ERATTR2_PERR_BYTE		0x000000F0
+/* Parity Error reported in addr or data phase */
+#define IFC_GPCM_ERATTR2_PERR_DATA_PHASE	0x00000001
+
+/*
+ * GPCM Status Register (GPCM_STAT)
+ */
+#define IFC_GPCM_STAT_BSY		0x80000000  /* GPCM is busy */
+
+/*
+ * IFC Controller NAND Machine registers
+ */
+struct fsl_ifc_nand {
+	__be32 ncfgr;
+	u32 res1[0x4];
+	__be32 nand_fcr0;
+	__be32 nand_fcr1;
+	u32 res2[0x8];
+	__be32 row0;
+	u32 res3;
+	__be32 col0;
+	u32 res4;
+	__be32 row1;
+	u32 res5;
+	__be32 col1;
+	u32 res6;
+	__be32 row2;
+	u32 res7;
+	__be32 col2;
+	u32 res8;
+	__be32 row3;
+	u32 res9;
+	__be32 col3;
+	u32 res10[0x24];
+	__be32 nand_fbcr;
+	u32 res11;
+	__be32 nand_fir0;
+	__be32 nand_fir1;
+	__be32 nand_fir2;
+	u32 res12[0x10];
+	__be32 nand_csel;
+	u32 res13;
+	__be32 nandseq_strt;
+	u32 res14;
+	__be32 nand_evter_stat;
+	u32 res15;
+	__be32 pgrdcmpl_evt_stat;
+	u32 res16[0x2];
+	__be32 nand_evter_en;
+	u32 res17[0x2];
+	__be32 nand_evter_intr_en;
+	u32 res18[0x2];
+	__be32 nand_erattr0;
+	__be32 nand_erattr1;
+	u32 res19[0x10];
+	__be32 nand_fsr;
+	u32 res20;
+	__be32 nand_eccstat[4];
+	u32 res21[0x20];
+	__be32 nanndcr;
+	u32 res22[0x2];
+	__be32 nand_autoboot_trgr;
+	u32 res23;
+	__be32 nand_mdr;
+	u32 res24[0x5C];
+};
+
+/*
+ * IFC controller NOR Machine registers
+ */
+struct fsl_ifc_nor {
+	__be32 nor_evter_stat;
+	u32 res1[0x2];
+	__be32 nor_evter_en;
+	u32 res2[0x2];
+	__be32 nor_evter_intr_en;
+	u32 res3[0x2];
+	__be32 nor_erattr0;
+	__be32 nor_erattr1;
+	__be32 nor_erattr2;
+	u32 res4[0x4];
+	__be32 norcr;
+	u32 res5[0xEF];
+};
+
+/*
+ * IFC controller GPCM Machine registers
+ */
+struct fsl_ifc_gpcm {
+	__be32 gpcm_evter_stat;
+	u32 res1[0x2];
+	__be32 gpcm_evter_en;
+	u32 res2[0x2];
+	__be32 gpcm_evter_intr_en;
+	u32 res3[0x2];
+	__be32 gpcm_erattr0;
+	__be32 gpcm_erattr1;
+	__be32 gpcm_erattr2;
+	__be32 gpcm_stat;
+	u32 res4[0x1F3];
+};
+
+/*
+ * IFC Controller Registers
+ */
+struct fsl_ifc_regs {
+	__be32 ifc_rev;
+	u32 res1[0x3];
+	struct {
+		__be32 cspr;
+		u32 res2[0x2];
+	} cspr_cs[FSL_IFC_BANK_COUNT];
+	u32 res3[0x18];
+	struct {
+		__be32 amask;
+		u32 res4[0x2];
+	} amask_cs[FSL_IFC_BANK_COUNT];
+	u32 res5[0x18];
+	struct {
+		__be32 csor;
+		u32 res6[0x2];
+	} csor_cs[FSL_IFC_BANK_COUNT];
+	u32 res7[0x18];
+	struct {
+		__be32 ftim[4];
+		u32 res8[0x8];
+	} ftim_cs[FSL_IFC_BANK_COUNT];
+	u32 res9[0x60];
+	__be32 rb_stat;
+	u32 res10[0x2];
+	__be32 ifc_gcr;
+	u32 res11[0x2];
+	__be32 cm_evter_stat;
+	u32 res12[0x2];
+	__be32 cm_evter_en;
+	u32 res13[0x2];
+	__be32 cm_evter_intr_en;
+	u32 res14[0x2];
+	__be32 cm_erattr0;
+	__be32 cm_erattr1;
+	u32 res15[0x2];
+	__be32 ifc_ccr;
+	__be32 ifc_csr;
+	u32 res16[0x2EB];
+	struct fsl_ifc_nand ifc_nand;
+	struct fsl_ifc_nor ifc_nor;
+	struct fsl_ifc_gpcm ifc_gpcm;
+};
+
+extern unsigned int convert_ifc_address(phys_addr_t addr_base);
+extern int fsl_ifc_find(phys_addr_t addr_base);
+
+/* overview of the fsl ifc controller */
+
+struct fsl_ifc_ctrl {
+	/* device info */
+	struct device			*dev;
+	struct fsl_ifc_regs __iomem	*regs;
+	int				irq;
+	int				nand_irq;
+	spinlock_t			lock;
+	void				*nand;
+
+	u32 nand_stat;
+	wait_queue_head_t nand_wait;
+};
+
+extern struct fsl_ifc_ctrl *fsl_ifc_ctrl_dev;
+
+
+#endif /* __ASM_FSL_IFC_H */
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index cf736ca..c71a63b 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o
 obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
 obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
+obj-$(CONFIG_FSL_IFC)		+= fsl_ifc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_MPC8xxx_GPIO)	+= mpc8xxx_gpio.o
 obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
diff --git a/arch/powerpc/sysdev/fsl_ifc.c b/arch/powerpc/sysdev/fsl_ifc.c
new file mode 100644
index 0000000..45c5eed
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_ifc.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc
+ *
+ * Freescale Integrated Flash Controller
+ *
+ * Author: Dipen Dudhat <Dipen.Dudhat@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <asm/prom.h>
+#include <asm/fsl_ifc.h>
+
+struct fsl_ifc_ctrl *fsl_ifc_ctrl_dev;
+EXPORT_SYMBOL(fsl_ifc_ctrl_dev);
+
+/*
+ * convert_ifc_address - convert the base address
+ * @addr_base:	base address of the memory bank
+ */
+unsigned int convert_ifc_address(phys_addr_t addr_base)
+{
+	return addr_base & CSPR_BA;
+}
+EXPORT_SYMBOL(convert_ifc_address);
+
+/*
+ * fsl_ifc_find - find IFC bank
+ * @addr_base:	base address of the memory bank
+ *
+ * This function walks IFC banks comparing "Base address" field of the CSPR
+ * registers with the supplied addr_base argument. When bases match this
+ * function returns bank number (starting with 0), otherwise it returns
+ * appropriate errno value.
+ */
+int fsl_ifc_find(phys_addr_t addr_base)
+{
+	int i = 0;
+
+	if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs)
+		return -ENODEV;
+
+	for (i = 0; i < ARRAY_SIZE(fsl_ifc_ctrl_dev->regs->cspr_cs); i++) {
+		__be32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr);
+		if (cspr & CSPR_V && (cspr & CSPR_BA) ==
+				convert_ifc_address(addr_base))
+			return i;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(fsl_ifc_find);
+
+static int __devinit fsl_ifc_ctrl_init(struct fsl_ifc_ctrl *ctrl)
+{
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+
+	/*
+	 * Clear all the common status and event registers
+	 */
+	if (in_be32(&ifc->cm_evter_stat) & IFC_CM_EVTER_STAT_CSER)
+		out_be32(&ifc->cm_evter_stat, IFC_CM_EVTER_STAT_CSER);
+
+	/* enable all error and events */
+	out_be32(&ifc->cm_evter_en, IFC_CM_EVTER_EN_CSEREN);
+
+	/* enable all error and event interrupts */
+	out_be32(&ifc->cm_evter_intr_en, IFC_CM_EVTER_INTR_EN_CSERIREN);
+	out_be32(&ifc->cm_erattr0, 0x0);
+	out_be32(&ifc->cm_erattr1, 0x0);
+
+	return 0;
+}
+
+static int fsl_ifc_ctrl_remove(struct platform_device *dev)
+{
+	struct fsl_ifc_ctrl *ctrl = dev_get_drvdata(&dev->dev);
+
+	free_irq(ctrl->nand_irq, ctrl);
+	free_irq(ctrl->irq, ctrl);
+
+	irq_dispose_mapping(ctrl->nand_irq);
+	irq_dispose_mapping(ctrl->irq);
+
+	iounmap(ctrl->regs);
+
+	dev_set_drvdata(&dev->dev, NULL);
+	kfree(ctrl);
+
+	return 0;
+}
+
+/*
+ * NAND events are split between an operational interrupt which only
+ * receives OPC, and an error interrupt that receives everything else,
+ * including non-NAND errors.  Whichever interrupt gets to it first
+ * records the status and wakes the wait queue.
+ */
+static DEFINE_SPINLOCK(nand_irq_lock);
+
+static u32 check_nand_stat(struct fsl_ifc_ctrl *ctrl)
+{
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	unsigned long flags;
+	u32 stat;
+
+	spin_lock_irqsave(&nand_irq_lock, flags);
+
+	stat = in_be32(&ifc->ifc_nand.nand_evter_stat);
+	if (stat) {
+		out_be32(&ifc->ifc_nand.nand_evter_stat, stat);
+		ctrl->nand_stat = stat;
+		wake_up(&ctrl->nand_wait);
+	}
+
+	spin_unlock_irqrestore(&nand_irq_lock, flags);
+
+	return stat;
+}
+
+static irqreturn_t fsl_ifc_nand_irq(int irqno, void *data)
+{
+	struct fsl_ifc_ctrl *ctrl = data;
+
+	if (check_nand_stat(ctrl))
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+/*
+ * NOTE: This interrupt is used to report ifc events of various kinds,
+ * such as transaction errors on the chipselects.
+ */
+static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data)
+{
+	struct fsl_ifc_ctrl *ctrl = data;
+	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
+	u32 err_axiid, err_srcid, status, cs_err, err_addr;
+	irqreturn_t ret = IRQ_NONE;
+
+	/* read for chip select error */
+	cs_err = in_be32(&ifc->cm_evter_stat);
+	if (cs_err) {
+		dev_err(ctrl->dev, "transaction sent to IFC is not mapped to"
+				"any memory bank 0x%08X\n", cs_err);
+		/* clear the chip select error */
+		out_be32(&ifc->cm_evter_stat, IFC_CM_EVTER_STAT_CSER);
+
+		/* read error attribute registers print the error information */
+		status = in_be32(&ifc->cm_erattr0);
+		err_addr = in_be32(&ifc->cm_erattr1);
+
+		if (status & IFC_CM_ERATTR0_ERTYP_READ)
+			dev_err(ctrl->dev, "Read transaction error"
+				"CM_ERATTR0 0x%08X\n", status);
+		else
+			dev_err(ctrl->dev, "Write transaction error"
+				"CM_ERATTR0 0x%08X\n", status);
+
+		err_axiid = (status & IFC_CM_ERATTR0_ERAID) >>
+					IFC_CM_ERATTR0_ERAID_SHIFT;
+		dev_err(ctrl->dev, "AXI ID of the error"
+					"transaction 0x%08X\n", err_axiid);
+
+		err_srcid = (status & IFC_CM_ERATTR0_ESRCID) >>
+					IFC_CM_ERATTR0_ESRCID_SHIFT;
+		dev_err(ctrl->dev, "SRC ID of the error"
+					"transaction 0x%08X\n", err_srcid);
+
+		dev_err(ctrl->dev, "Transaction Address corresponding to error"
+					"ERADDR 0x%08X\n", err_addr);
+
+		ret = IRQ_HANDLED;
+	}
+
+	if (check_nand_stat(ctrl))
+		ret = IRQ_HANDLED;
+
+	return ret;
+}
+
+/*
+ * fsl_ifc_ctrl_probe
+ *
+ * called by device layer when it finds a device matching
+ * one our driver can handled. This code allocates all of
+ * the resources needed for the controller only.  The
+ * resources for the NAND banks themselves are allocated
+ * in the chip probe function.
+*/
+static int __devinit fsl_ifc_ctrl_probe(struct platform_device *dev)
+{
+	int ret = 0;
+
+
+	dev_info(&dev->dev, "Freescale Integrated Flash Controller\n");
+
+	fsl_ifc_ctrl_dev = kzalloc(sizeof(*fsl_ifc_ctrl_dev), GFP_KERNEL);
+	if (!fsl_ifc_ctrl_dev)
+		return -ENOMEM;
+
+	dev_set_drvdata(&dev->dev, fsl_ifc_ctrl_dev);
+
+	/* IOMAP the entire IFC region */
+	fsl_ifc_ctrl_dev->regs = of_iomap(dev->dev.of_node, 0);
+	if (!fsl_ifc_ctrl_dev->regs) {
+		dev_err(&dev->dev, "failed to get memory region\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/* get the Controller level irq */
+	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
+	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
+		dev_err(&dev->dev, "failed to get irq resource "
+							"for IFC\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/* get the nand machine irq */
+	fsl_ifc_ctrl_dev->nand_irq =
+			irq_of_parse_and_map(dev->dev.of_node, 1);
+	if (fsl_ifc_ctrl_dev->nand_irq == NO_IRQ) {
+		dev_err(&dev->dev, "failed to get irq resource "
+						"for NAND Machine\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	fsl_ifc_ctrl_dev->dev = &dev->dev;
+
+	ret = fsl_ifc_ctrl_init(fsl_ifc_ctrl_dev);
+	if (ret < 0)
+		goto err;
+
+	init_waitqueue_head(&fsl_ifc_ctrl_dev->nand_wait);
+
+	ret = request_irq(fsl_ifc_ctrl_dev->irq, fsl_ifc_ctrl_irq, IRQF_SHARED,
+			  "fsl-ifc", fsl_ifc_ctrl_dev);
+	if (ret != 0) {
+		dev_err(&dev->dev, "failed to install irq (%d)\n",
+			fsl_ifc_ctrl_dev->irq);
+		goto err;
+	}
+
+	ret = request_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_nand_irq, 0,
+			  "fsl-ifc-nand", fsl_ifc_ctrl_dev);
+	if (ret != 0) {
+		dev_err(&dev->dev, "failed to install irq (%d)\n",
+			fsl_ifc_ctrl_dev->nand_irq);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	return ret;
+}
+
+static const struct of_device_id fsl_ifc_match[] = {
+	{
+		.compatible = "fsl,ifc",
+	},
+	{},
+};
+
+static struct platform_driver fsl_ifc_ctrl_driver = {
+	.driver = {
+		.name	= "fsl-ifc",
+		.of_match_table = fsl_ifc_match,
+	},
+	.probe       = fsl_ifc_ctrl_probe,
+	.remove      = fsl_ifc_ctrl_remove,
+};
+
+static __init int fsl_ifc_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&fsl_ifc_ctrl_driver);
+	if (ret)
+		printk(KERN_ERR "fsl-ifc: Failed to register platform"
+				"driver\n");
+
+	return ret;
+}
+
+static void __exit fsl_ifc_exit(void)
+{
+	platform_driver_unregister(&fsl_ifc_ctrl_driver);
+}
+
+module_init(fsl_ifc_init);
+module_exit(fsl_ifc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Freescale Semiconductor");
+MODULE_DESCRIPTION("Freescale Integrated Flash Controller driver");
-- 
1.7.1

^ permalink raw reply related

* [PATCH] mtd/nand : set Nand flash page address to FBAR and FPAR correctly
From: b35362 @ 2011-10-31  9:38 UTC (permalink / raw)
  To: dwmw2, Artem.Bityutskiy
  Cc: r58472, linux-kernel, linux-mtd, scottwood, akpm, linuxppc-dev
In-Reply-To: <1320053901-23801-1-git-send-email-b35362@freescale.com>

From: Liu Shuo <b35362@freescale.com>

If we use the Nand flash chip whose number of pages in a block is greater
than 64(for large page), we must treat the low bit of FBAR as being the
high bit of the page address due to the limitation of FCM, it simply uses
the low 6-bits (for large page) of the combined block/page address as the
FPAR component, rather than considering the actual block size.

Signed-off-by: Liu Shuo <b35362@freescale.com>
Signed-off-by: Jerry Huang <Chang-Ming.Huang@freescale.com>
Signed-off-by: Tang Yuantian <b29983@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
---
 drivers/mtd/nand/fsl_elbc_nand.c |   13 ++++++++++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 33d8aad..681d8c5 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -167,15 +167,22 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob)
 
 	elbc_fcm_ctrl->page = page_addr;
 
-	out_be32(&lbc->fbar,
-	         page_addr >> (chip->phys_erase_shift - chip->page_shift));
-
 	if (priv->page_size) {
+		/*
+		 * large page size chip : FPAR[PI] save the lowest 6 bits,
+		 *                        FBAR[BLK] save the other bits.
+		 */
+		out_be32(&lbc->fbar, page_addr >> 6);
 		out_be32(&lbc->fpar,
 		         ((page_addr << FPAR_LP_PI_SHIFT) & FPAR_LP_PI) |
 		         (oob ? FPAR_LP_MS : 0) | column);
 		buf_num = (page_addr & 1) << 2;
 	} else {
+		/*
+		 * small page size chip : FPAR[PI] save the lowest 5 bits,
+		 *                        FBAR[BLK] save the other bits.
+		 */
+		out_be32(&lbc->fbar, page_addr >> 5);
 		out_be32(&lbc->fpar,
 		         ((page_addr << FPAR_SP_PI_SHIFT) & FPAR_SP_PI) |
 		         (oob ? FPAR_SP_MS : 0) | column);
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox